sonusai 0.19.10__py3-none-any.whl → 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,136 +0,0 @@
1
- from sonusai.mixture.datatypes import AudioT
2
- from sonusai.mixture.datatypes import Augmentation
3
- from sonusai.mixture.datatypes import ImpulseResponseData
4
-
5
-
6
- def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length: int = 1) -> AudioT:
7
- """Apply augmentations to audio data using SoX
8
-
9
- :param audio: Audio
10
- :param augmentation: Augmentation
11
- :param frame_length: Pad resulting audio to be a multiple of this
12
- :return: Augmented audio
13
- """
14
- from .augmentation import pad_audio_to_frame
15
- from .constants import BIT_DEPTH
16
- from .constants import CHANNEL_COUNT
17
- from .constants import ENCODING
18
- from .constants import SAMPLE_RATE
19
- from .sox_audio import Transformer
20
-
21
- has_effects = False
22
-
23
- try:
24
- # Apply augmentations
25
- tfm = Transformer()
26
- tfm.set_input_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
27
- tfm.set_output_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
28
-
29
- # TODO
30
- # Always normalize and remove normalize from list of available augmentations
31
- # Normalize to globally set level (should this be a global config parameter,
32
- # or hard-coded into the script?)
33
- if augmentation.normalize is not None:
34
- tfm.norm(db_level=augmentation.normalize)
35
- has_effects = True
36
-
37
- if augmentation.gain is not None:
38
- tfm.gain(gain_db=augmentation.gain, normalize=False)
39
- has_effects = True
40
-
41
- if augmentation.pitch is not None:
42
- tfm.pitch(n_semitones=float(augmentation.pitch) / 100)
43
- tfm.rate(samplerate=SAMPLE_RATE)
44
- has_effects = True
45
-
46
- if augmentation.tempo is not None:
47
- tfm.tempo(factor=float(augmentation.tempo), audio_type="s")
48
- has_effects = True
49
-
50
- if augmentation.eq1 is not None:
51
- tfm.equalizer(*augmentation.eq1)
52
- has_effects = True
53
-
54
- if augmentation.eq2 is not None:
55
- tfm.equalizer(*augmentation.eq2)
56
- has_effects = True
57
-
58
- if augmentation.eq3 is not None:
59
- tfm.equalizer(*augmentation.eq3)
60
- has_effects = True
61
-
62
- if augmentation.lpf is not None:
63
- tfm.lowpass(frequency=augmentation.lpf)
64
- has_effects = True
65
-
66
- if has_effects:
67
- audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
68
- else:
69
- audio_out = audio
70
-
71
- except Exception as e:
72
- raise RuntimeError(f"Error applying {augmentation}: {e}") from e
73
-
74
- # make sure length is multiple of frame_length
75
- return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
76
-
77
-
78
- def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
79
- """Apply impulse response to audio data using SoX
80
-
81
- :param audio: Audio
82
- :param ir: Impulse response data
83
- :return: Augmented audio
84
- """
85
- import math
86
- import tempfile
87
- from pathlib import Path
88
-
89
- import numpy as np
90
-
91
- from sonusai.utils import linear_to_db
92
-
93
- from .constants import SAMPLE_RATE
94
- from .sox_audio import Transformer
95
-
96
- # Early exit if no ir or if all audio is zero
97
- if ir is None or not audio.any():
98
- return audio
99
-
100
- # Get current maximum level in dB
101
- max_db = linear_to_db(max(abs(audio)))
102
-
103
- # Convert audio to IR sample rate
104
- tfm = Transformer()
105
- tfm.set_output_format(rate=ir.sample_rate)
106
- audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
107
-
108
- # Pad audio to align with original and give enough room for IR tail
109
- pad = math.ceil(ir.length / 2)
110
- audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
111
-
112
- # Write coefficients to temporary file
113
- temp = tempfile.NamedTemporaryFile(mode="w+t")
114
- for d in ir.data:
115
- temp.write(f"{d:f}\n")
116
- temp.seek(0)
117
-
118
- # Apply IR and convert back to global sample rate
119
- tfm = Transformer()
120
- tfm.set_output_format(rate=SAMPLE_RATE)
121
- tfm.fir(coefficients=temp.name) # pyright: ignore [reportArgumentType]
122
- try:
123
- audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=ir.sample_rate)
124
- except Exception as e:
125
- raise RuntimeError(f"Error applying IR: {e}") from e
126
-
127
- path = Path(temp.name)
128
- temp.close()
129
- path.unlink()
130
-
131
- # Reset level to previous max value
132
- tfm = Transformer()
133
- tfm.norm(db_level=max_db)
134
- audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
135
-
136
- return audio_out[: len(audio)]
@@ -1,106 +0,0 @@
1
- from pathlib import Path
2
-
3
- from sonusai.mixture.datatypes import AudioT
4
- from sonusai.mixture.datatypes import ImpulseResponseData
5
-
6
-
7
- def read_impulse_response(
8
- name: str | Path,
9
- delay_compensation: bool = True,
10
- normalize: bool = True,
11
- ) -> ImpulseResponseData:
12
- """Read impulse response data using torchaudio
13
-
14
- :param name: File name
15
- :param delay_compensation: Apply delay compensation
16
- :param normalize: Apply normalization
17
- :return: ImpulseResponseData object
18
- """
19
- import numpy as np
20
- import torch
21
- import torchaudio
22
-
23
- from .tokenized_shell_vars import tokenized_expand
24
-
25
- expanded_name, _ = tokenized_expand(name)
26
-
27
- # Read impulse response data from audio file
28
- try:
29
- raw, sample_rate = torchaudio.load(expanded_name, backend="soundfile")
30
- except Exception as e:
31
- if name != expanded_name:
32
- raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
33
- else:
34
- raise OSError(f"Error reading {name}: {e}") from e
35
-
36
- raw = torch.squeeze(raw[0, :])
37
-
38
- if delay_compensation:
39
- offset = torch.argmax(raw)
40
- raw = raw[offset:]
41
-
42
- data = np.array(raw).astype(np.float32)
43
-
44
- if normalize:
45
- # Inexplicably,
46
- # data = data / torch.linalg.vector_norm(data)
47
- # causes multiprocessing contexts to hang.
48
- # Use np.linalg.norm() instead.
49
- data = data / np.linalg.norm(data)
50
-
51
- return ImpulseResponseData(name=str(name), sample_rate=sample_rate, data=data)
52
-
53
-
54
- def get_sample_rate(name: str | Path) -> int:
55
- """Get sample rate from audio file using torchaudio
56
-
57
- :param name: File name
58
- :return: Sample rate
59
- """
60
- import torchaudio
61
-
62
- from .tokenized_shell_vars import tokenized_expand
63
-
64
- expanded_name, _ = tokenized_expand(name)
65
-
66
- try:
67
- return torchaudio.info(expanded_name).sample_rate
68
- except Exception as e:
69
- if name != expanded_name:
70
- raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
71
- else:
72
- raise OSError(f"Error reading {name}:\n{e}") from e
73
-
74
-
75
- def read_audio(name: str | Path) -> AudioT:
76
- """Read audio data from a file using torchaudio
77
-
78
- :param name: File name
79
- :return: Array of time domain audio data
80
- """
81
- import numpy as np
82
- import torch
83
- import torchaudio
84
-
85
- from .constants import SAMPLE_RATE
86
- from .tokenized_shell_vars import tokenized_expand
87
-
88
- expanded_name, _ = tokenized_expand(name)
89
-
90
- try:
91
- out, samplerate = torchaudio.load(expanded_name, backend="soundfile")
92
- out = torch.reshape(out[0, :], (1, out.size()[1]))
93
- out = torchaudio.functional.resample(
94
- out,
95
- orig_freq=samplerate,
96
- new_freq=SAMPLE_RATE,
97
- resampling_method="sinc_interp_hann",
98
- )
99
- except Exception as e:
100
- if name != expanded_name:
101
- raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
102
- else:
103
- raise OSError(f"Error reading {name}:\n{e}") from e
104
-
105
- result = np.squeeze(np.array(out))
106
- return result
@@ -1,109 +0,0 @@
1
- from sonusai.mixture.datatypes import AudioT
2
- from sonusai.mixture.datatypes import Augmentation
3
- from sonusai.mixture.datatypes import ImpulseResponseData
4
-
5
-
6
- def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length: int = 1) -> AudioT:
7
- """Apply augmentations to audio data using torchaudio.sox_effects
8
-
9
- :param audio: Audio
10
- :param augmentation: Augmentation
11
- :param frame_length: Pad resulting audio to be a multiple of this
12
- :return: Augmented audio
13
- """
14
- import numpy as np
15
- import torch
16
- import torchaudio
17
-
18
- from .augmentation import pad_audio_to_frame
19
- from .constants import SAMPLE_RATE
20
-
21
- effects: list[list[str]] = []
22
-
23
- # TODO: Always normalize and remove normalize from list of available augmentations
24
- # Normalize to globally set level (should this be a global config parameter, or hard-coded into the script?)
25
- # TODO: Support all sox effects supported by torchaudio (torchaudio.sox_effects.effect_names())
26
- if augmentation.normalize is not None:
27
- effects.append(["norm", str(augmentation.normalize)])
28
-
29
- if augmentation.gain is not None:
30
- effects.append(["gain", str(augmentation.gain)])
31
-
32
- if augmentation.pitch is not None:
33
- effects.append(["pitch", str(augmentation.pitch)])
34
- effects.append(["rate", str(SAMPLE_RATE)])
35
-
36
- if augmentation.tempo is not None:
37
- effects.append(["tempo", "-s", str(augmentation.tempo)])
38
-
39
- if augmentation.eq1 is not None:
40
- effects.append(["equalizer", *[str(item) for item in augmentation.eq1]])
41
-
42
- if augmentation.eq2 is not None:
43
- effects.append(["equalizer", *[str(item) for item in augmentation.eq2]])
44
-
45
- if augmentation.eq3 is not None:
46
- effects.append(["equalizer", *[str(item) for item in augmentation.eq3]])
47
-
48
- if augmentation.lpf is not None:
49
- effects.append(["lowpass", "-2", str(augmentation.lpf), "0.707"])
50
-
51
- if effects:
52
- if audio.ndim == 1:
53
- audio = np.reshape(audio, (1, audio.shape[0]))
54
- out = torch.tensor(audio)
55
-
56
- try:
57
- out, _ = torchaudio.sox_effects.apply_effects_tensor(out, sample_rate=SAMPLE_RATE, effects=effects)
58
- except Exception as e:
59
- raise RuntimeError(f"Error applying {augmentation}: {e}") from e
60
-
61
- audio_out = np.squeeze(np.array(out))
62
- else:
63
- audio_out = audio
64
-
65
- # make sure length is multiple of frame_length
66
- return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
67
-
68
-
69
- def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
70
- """Apply impulse response to audio data using torchaudio.fftconvolve
71
-
72
- :param audio: Audio
73
- :param ir: Impulse response data
74
- :return: Augmented audio
75
- """
76
- import numpy as np
77
- import torch
78
- import torchaudio
79
-
80
- from sonusai.utils import linear_to_db
81
-
82
- from .constants import SAMPLE_RATE
83
-
84
- # Early exit if no ir or if all audio is zero
85
- if ir is None or not audio.any():
86
- return audio
87
-
88
- # Get current maximum level in dB
89
- max_db = linear_to_db(max(abs(audio)))
90
-
91
- # Convert audio to IR sample rate
92
- audio_in = torch.reshape(torch.tensor(audio), (1, len(audio)))
93
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
94
- audio_in, sample_rate=SAMPLE_RATE, effects=[["rate", str(ir.sample_rate)]]
95
- )
96
-
97
- # Apply IR and convert back to global sample rate
98
- rir = torch.reshape(torch.tensor(ir.data), (1, len(ir.data)))
99
- audio_out = torchaudio.functional.fftconvolve(audio_out, rir)
100
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
101
- audio_out, sample_rate=ir.sample_rate, effects=[["rate", str(SAMPLE_RATE)]]
102
- )
103
-
104
- # Reset level to previous max value
105
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
106
- audio_out, sample_rate=SAMPLE_RATE, effects=[["norm", str(max_db)]]
107
- )
108
-
109
- return np.squeeze(np.array(audio_out[:, : len(audio)]))