PyPI - sonusai - Versions diffs - 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl - Mend

sonusai 0.19.10py3-none-any.whl → 0.20.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

sonusai/data/genmixdb.yml +4 -2
sonusai/doc/doc.py +14 -0
sonusai/ir_metric.py +555 -0
sonusai/metrics_summary.py +5 -3
sonusai/mixture/__init__.py +4 -1
sonusai/mixture/audio.py +103 -12
sonusai/mixture/augmentation.py +199 -84
sonusai/mixture/config.py +9 -4
sonusai/mixture/constants.py +0 -1
sonusai/mixture/datatypes.py +19 -10
sonusai/mixture/generation.py +11 -12
sonusai/mixture/helpers.py +20 -23
sonusai/mixture/ir_delay.py +63 -0
sonusai/mixture/mixdb.py +103 -19
sonusai/mixture/targets.py +3 -6
sonusai/utils/__init__.py +2 -0
sonusai/utils/temp_seed.py +13 -0
{sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/METADATA +2 -2
{sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/RECORD +21 -23
{sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/WHEEL +1 -1
sonusai/mixture/soundfile_audio.py +0 -130
sonusai/mixture/sox_audio.py +0 -476
sonusai/mixture/sox_augmentation.py +0 -136
sonusai/mixture/torchaudio_audio.py +0 -106
sonusai/mixture/torchaudio_augmentation.py +0 -109
{sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/entry_points.txt +0 -0

sonusai/mixture/sox_augmentation.py DELETED Viewed

@@ -1,136 +0,0 @@
-from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import Augmentation
-from sonusai.mixture.datatypes import ImpulseResponseData
-def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length: int = 1) -> AudioT:
-    """Apply augmentations to audio data using SoX
-    :param audio: Audio
-    :param augmentation: Augmentation
-    :param frame_length: Pad resulting audio to be a multiple of this
-    :return: Augmented audio
-    """
-    from .augmentation import pad_audio_to_frame
-    from .constants import BIT_DEPTH
-    from .constants import CHANNEL_COUNT
-    from .constants import ENCODING
-    from .constants import SAMPLE_RATE
-    from .sox_audio import Transformer
-    has_effects = False
-    try:
-        # Apply augmentations
-        tfm = Transformer()
-        tfm.set_input_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
-        tfm.set_output_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
-        # TODO
-        #  Always normalize and remove normalize from list of available augmentations
-        #  Normalize to globally set level (should this be a global config parameter,
-        #  or hard-coded into the script?)
-        if augmentation.normalize is not None:
-            tfm.norm(db_level=augmentation.normalize)
-            has_effects = True
-        if augmentation.gain is not None:
-            tfm.gain(gain_db=augmentation.gain, normalize=False)
-            has_effects = True
-        if augmentation.pitch is not None:
-            tfm.pitch(n_semitones=float(augmentation.pitch) / 100)
-            tfm.rate(samplerate=SAMPLE_RATE)
-            has_effects = True
-        if augmentation.tempo is not None:
-            tfm.tempo(factor=float(augmentation.tempo), audio_type="s")
-            has_effects = True
-        if augmentation.eq1 is not None:
-            tfm.equalizer(*augmentation.eq1)
-            has_effects = True
-        if augmentation.eq2 is not None:
-            tfm.equalizer(*augmentation.eq2)
-            has_effects = True
-        if augmentation.eq3 is not None:
-            tfm.equalizer(*augmentation.eq3)
-            has_effects = True
-        if augmentation.lpf is not None:
-            tfm.lowpass(frequency=augmentation.lpf)
-            has_effects = True
-        if has_effects:
-            audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
-        else:
-            audio_out = audio
-    except Exception as e:
-        raise RuntimeError(f"Error applying {augmentation}: {e}") from e
-    # make sure length is multiple of frame_length
-    return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
-def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
-    """Apply impulse response to audio data using SoX
-    :param audio: Audio
-    :param ir: Impulse response data
-    :return: Augmented audio
-    """
-    import math
-    import tempfile
-    from pathlib import Path
-    import numpy as np
-    from sonusai.utils import linear_to_db
-    from .constants import SAMPLE_RATE
-    from .sox_audio import Transformer
-    # Early exit if no ir or if all audio is zero
-    if ir is None or not audio.any():
-        return audio
-    # Get current maximum level in dB
-    max_db = linear_to_db(max(abs(audio)))
-    # Convert audio to IR sample rate
-    tfm = Transformer()
-    tfm.set_output_format(rate=ir.sample_rate)
-    audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
-    # Pad audio to align with original and give enough room for IR tail
-    pad = math.ceil(ir.length / 2)
-    audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
-    # Write coefficients to temporary file
-    temp = tempfile.NamedTemporaryFile(mode="w+t")
-    for d in ir.data:
-        temp.write(f"{d:f}\n")
-    temp.seek(0)
-    # Apply IR and convert back to global sample rate
-    tfm = Transformer()
-    tfm.set_output_format(rate=SAMPLE_RATE)
-    tfm.fir(coefficients=temp.name)  # pyright: ignore [reportArgumentType]
-    try:
-        audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=ir.sample_rate)
-    except Exception as e:
-        raise RuntimeError(f"Error applying IR: {e}") from e
-    path = Path(temp.name)
-    temp.close()
-    path.unlink()
-    # Reset level to previous max value
-    tfm = Transformer()
-    tfm.norm(db_level=max_db)
-    audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
-    return audio_out[: len(audio)]

sonusai/mixture/torchaudio_audio.py DELETED Viewed

@@ -1,106 +0,0 @@
-from pathlib import Path
-from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import ImpulseResponseData
-def read_impulse_response(
-    name: str | Path,
-    delay_compensation: bool = True,
-    normalize: bool = True,
-) -> ImpulseResponseData:
-    """Read impulse response data using torchaudio
-    :param name: File name
-    :param delay_compensation: Apply delay compensation
-    :param normalize: Apply normalization
-    :return: ImpulseResponseData object
-    """
-    import numpy as np
-    import torch
-    import torchaudio
-    from .tokenized_shell_vars import tokenized_expand
-    expanded_name, _ = tokenized_expand(name)
-    # Read impulse response data from audio file
-    try:
-        raw, sample_rate = torchaudio.load(expanded_name, backend="soundfile")
-    except Exception as e:
-        if name != expanded_name:
-            raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
-        else:
-            raise OSError(f"Error reading {name}: {e}") from e
-    raw = torch.squeeze(raw[0, :])
-    if delay_compensation:
-        offset = torch.argmax(raw)
-        raw = raw[offset:]
-    data = np.array(raw).astype(np.float32)
-    if normalize:
-        # Inexplicably,
-        #   data = data / torch.linalg.vector_norm(data)
-        # causes multiprocessing contexts to hang.
-        # Use np.linalg.norm() instead.
-        data = data / np.linalg.norm(data)
-    return ImpulseResponseData(name=str(name), sample_rate=sample_rate, data=data)
-def get_sample_rate(name: str | Path) -> int:
-    """Get sample rate from audio file using torchaudio
-    :param name: File name
-    :return: Sample rate
-    """
-    import torchaudio
-    from .tokenized_shell_vars import tokenized_expand
-    expanded_name, _ = tokenized_expand(name)
-    try:
-        return torchaudio.info(expanded_name).sample_rate
-    except Exception as e:
-        if name != expanded_name:
-            raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
-        else:
-            raise OSError(f"Error reading {name}:\n{e}") from e
-def read_audio(name: str | Path) -> AudioT:
-    """Read audio data from a file using torchaudio
-    :param name: File name
-    :return: Array of time domain audio data
-    """
-    import numpy as np
-    import torch
-    import torchaudio
-    from .constants import SAMPLE_RATE
-    from .tokenized_shell_vars import tokenized_expand
-    expanded_name, _ = tokenized_expand(name)
-    try:
-        out, samplerate = torchaudio.load(expanded_name, backend="soundfile")
-        out = torch.reshape(out[0, :], (1, out.size()[1]))
-        out = torchaudio.functional.resample(
-            out,
-            orig_freq=samplerate,
-            new_freq=SAMPLE_RATE,
-            resampling_method="sinc_interp_hann",
-        )
-    except Exception as e:
-        if name != expanded_name:
-            raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
-        else:
-            raise OSError(f"Error reading {name}:\n{e}") from e
-    result = np.squeeze(np.array(out))
-    return result

sonusai/mixture/torchaudio_augmentation.py DELETED Viewed

@@ -1,109 +0,0 @@
-from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import Augmentation
-from sonusai.mixture.datatypes import ImpulseResponseData
-def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length: int = 1) -> AudioT:
-    """Apply augmentations to audio data using torchaudio.sox_effects
-    :param audio: Audio
-    :param augmentation: Augmentation
-    :param frame_length: Pad resulting audio to be a multiple of this
-    :return: Augmented audio
-    """
-    import numpy as np
-    import torch
-    import torchaudio
-    from .augmentation import pad_audio_to_frame
-    from .constants import SAMPLE_RATE
-    effects: list[list[str]] = []
-    # TODO: Always normalize and remove normalize from list of available augmentations
-    # Normalize to globally set level (should this be a global config parameter, or hard-coded into the script?)
-    # TODO: Support all sox effects supported by torchaudio (torchaudio.sox_effects.effect_names())
-    if augmentation.normalize is not None:
-        effects.append(["norm", str(augmentation.normalize)])
-    if augmentation.gain is not None:
-        effects.append(["gain", str(augmentation.gain)])
-    if augmentation.pitch is not None:
-        effects.append(["pitch", str(augmentation.pitch)])
-        effects.append(["rate", str(SAMPLE_RATE)])
-    if augmentation.tempo is not None:
-        effects.append(["tempo", "-s", str(augmentation.tempo)])
-    if augmentation.eq1 is not None:
-        effects.append(["equalizer", *[str(item) for item in augmentation.eq1]])
-    if augmentation.eq2 is not None:
-        effects.append(["equalizer", *[str(item) for item in augmentation.eq2]])
-    if augmentation.eq3 is not None:
-        effects.append(["equalizer", *[str(item) for item in augmentation.eq3]])
-    if augmentation.lpf is not None:
-        effects.append(["lowpass", "-2", str(augmentation.lpf), "0.707"])
-    if effects:
-        if audio.ndim == 1:
-            audio = np.reshape(audio, (1, audio.shape[0]))
-        out = torch.tensor(audio)
-        try:
-            out, _ = torchaudio.sox_effects.apply_effects_tensor(out, sample_rate=SAMPLE_RATE, effects=effects)
-        except Exception as e:
-            raise RuntimeError(f"Error applying {augmentation}: {e}") from e
-        audio_out = np.squeeze(np.array(out))
-    else:
-        audio_out = audio
-    # make sure length is multiple of frame_length
-    return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
-def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
-    """Apply impulse response to audio data using torchaudio.fftconvolve
-    :param audio: Audio
-    :param ir: Impulse response data
-    :return: Augmented audio
-    """
-    import numpy as np
-    import torch
-    import torchaudio
-    from sonusai.utils import linear_to_db
-    from .constants import SAMPLE_RATE
-    # Early exit if no ir or if all audio is zero
-    if ir is None or not audio.any():
-        return audio
-    # Get current maximum level in dB
-    max_db = linear_to_db(max(abs(audio)))
-    # Convert audio to IR sample rate
-    audio_in = torch.reshape(torch.tensor(audio), (1, len(audio)))
-    audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
-        audio_in, sample_rate=SAMPLE_RATE, effects=[["rate", str(ir.sample_rate)]]
-    )
-    # Apply IR and convert back to global sample rate
-    rir = torch.reshape(torch.tensor(ir.data), (1, len(ir.data)))
-    audio_out = torchaudio.functional.fftconvolve(audio_out, rir)
-    audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
-        audio_out, sample_rate=ir.sample_rate, effects=[["rate", str(SAMPLE_RATE)]]
-    )
-    # Reset level to previous max value
-    audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
-        audio_out, sample_rate=SAMPLE_RATE, effects=[["norm", str(max_db)]]
-    )
-    return np.squeeze(np.array(audio_out[:, : len(audio)]))

{sonusai-0.19.10.dist-info → sonusai-0.20.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

sonusai 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl

sonusai 0.19.10py3-none-any.whl → 0.20.2py3-none-any.whl