sonusai 0.19.9__py3-none-any.whl → 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. sonusai/calc_metric_spenh.py +265 -233
  2. sonusai/data/genmixdb.yml +4 -2
  3. sonusai/data/silero_vad_v5.1.jit +0 -0
  4. sonusai/data/silero_vad_v5.1.onnx +0 -0
  5. sonusai/doc/doc.py +14 -0
  6. sonusai/genft.py +1 -1
  7. sonusai/genmetrics.py +15 -18
  8. sonusai/genmix.py +1 -1
  9. sonusai/genmixdb.py +30 -52
  10. sonusai/ir_metric.py +555 -0
  11. sonusai/metrics_summary.py +322 -0
  12. sonusai/mixture/__init__.py +6 -2
  13. sonusai/mixture/audio.py +139 -15
  14. sonusai/mixture/augmentation.py +199 -84
  15. sonusai/mixture/config.py +9 -4
  16. sonusai/mixture/constants.py +0 -1
  17. sonusai/mixture/datatypes.py +19 -10
  18. sonusai/mixture/generation.py +52 -64
  19. sonusai/mixture/helpers.py +38 -26
  20. sonusai/mixture/ir_delay.py +63 -0
  21. sonusai/mixture/mixdb.py +190 -46
  22. sonusai/mixture/targets.py +3 -6
  23. sonusai/mixture/truth_functions/energy.py +9 -5
  24. sonusai/mixture/truth_functions/metrics.py +1 -1
  25. sonusai/mkwav.py +1 -1
  26. sonusai/onnx_predict.py +1 -1
  27. sonusai/queries/queries.py +1 -1
  28. sonusai/utils/__init__.py +2 -0
  29. sonusai/utils/asr.py +1 -1
  30. sonusai/utils/load_object.py +8 -2
  31. sonusai/utils/stratified_shuffle_split.py +1 -1
  32. sonusai/utils/temp_seed.py +13 -0
  33. {sonusai-0.19.9.dist-info → sonusai-0.20.2.dist-info}/METADATA +2 -2
  34. {sonusai-0.19.9.dist-info → sonusai-0.20.2.dist-info}/RECORD +36 -35
  35. {sonusai-0.19.9.dist-info → sonusai-0.20.2.dist-info}/WHEEL +1 -1
  36. sonusai/mixture/soundfile_audio.py +0 -130
  37. sonusai/mixture/sox_audio.py +0 -476
  38. sonusai/mixture/sox_augmentation.py +0 -136
  39. sonusai/mixture/torchaudio_audio.py +0 -106
  40. sonusai/mixture/torchaudio_augmentation.py +0 -109
  41. {sonusai-0.19.9.dist-info → sonusai-0.20.2.dist-info}/entry_points.txt +0 -0
@@ -1,106 +0,0 @@
1
- from pathlib import Path
2
-
3
- from sonusai.mixture.datatypes import AudioT
4
- from sonusai.mixture.datatypes import ImpulseResponseData
5
-
6
-
7
- def read_impulse_response(
8
- name: str | Path,
9
- delay_compensation: bool = True,
10
- normalize: bool = True,
11
- ) -> ImpulseResponseData:
12
- """Read impulse response data using torchaudio
13
-
14
- :param name: File name
15
- :param delay_compensation: Apply delay compensation
16
- :param normalize: Apply normalization
17
- :return: ImpulseResponseData object
18
- """
19
- import numpy as np
20
- import torch
21
- import torchaudio
22
-
23
- from .tokenized_shell_vars import tokenized_expand
24
-
25
- expanded_name, _ = tokenized_expand(name)
26
-
27
- # Read impulse response data from audio file
28
- try:
29
- raw, sample_rate = torchaudio.load(expanded_name, backend="soundfile")
30
- except Exception as e:
31
- if name != expanded_name:
32
- raise OSError(f"Error reading {name} (expanded: {expanded_name}): {e}") from e
33
- else:
34
- raise OSError(f"Error reading {name}: {e}") from e
35
-
36
- raw = torch.squeeze(raw[0, :])
37
-
38
- if delay_compensation:
39
- offset = torch.argmax(raw)
40
- raw = raw[offset:]
41
-
42
- data = np.array(raw).astype(np.float32)
43
-
44
- if normalize:
45
- # Inexplicably,
46
- # data = data / torch.linalg.vector_norm(data)
47
- # causes multiprocessing contexts to hang.
48
- # Use np.linalg.norm() instead.
49
- data = data / np.linalg.norm(data)
50
-
51
- return ImpulseResponseData(name=str(name), sample_rate=sample_rate, data=data)
52
-
53
-
54
- def get_sample_rate(name: str | Path) -> int:
55
- """Get sample rate from audio file using torchaudio
56
-
57
- :param name: File name
58
- :return: Sample rate
59
- """
60
- import torchaudio
61
-
62
- from .tokenized_shell_vars import tokenized_expand
63
-
64
- expanded_name, _ = tokenized_expand(name)
65
-
66
- try:
67
- return torchaudio.info(expanded_name).sample_rate
68
- except Exception as e:
69
- if name != expanded_name:
70
- raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
71
- else:
72
- raise OSError(f"Error reading {name}:\n{e}") from e
73
-
74
-
75
- def read_audio(name: str | Path) -> AudioT:
76
- """Read audio data from a file using torchaudio
77
-
78
- :param name: File name
79
- :return: Array of time domain audio data
80
- """
81
- import numpy as np
82
- import torch
83
- import torchaudio
84
-
85
- from .constants import SAMPLE_RATE
86
- from .tokenized_shell_vars import tokenized_expand
87
-
88
- expanded_name, _ = tokenized_expand(name)
89
-
90
- try:
91
- out, samplerate = torchaudio.load(expanded_name, backend="soundfile")
92
- out = torch.reshape(out[0, :], (1, out.size()[1]))
93
- out = torchaudio.functional.resample(
94
- out,
95
- orig_freq=samplerate,
96
- new_freq=SAMPLE_RATE,
97
- resampling_method="sinc_interp_hann",
98
- )
99
- except Exception as e:
100
- if name != expanded_name:
101
- raise OSError(f"Error reading {name} (expanded: {expanded_name}):\n{e}") from e
102
- else:
103
- raise OSError(f"Error reading {name}:\n{e}") from e
104
-
105
- result = np.squeeze(np.array(out))
106
- return result
@@ -1,109 +0,0 @@
1
- from sonusai.mixture.datatypes import AudioT
2
- from sonusai.mixture.datatypes import Augmentation
3
- from sonusai.mixture.datatypes import ImpulseResponseData
4
-
5
-
6
- def apply_augmentation(audio: AudioT, augmentation: Augmentation, frame_length: int = 1) -> AudioT:
7
- """Apply augmentations to audio data using torchaudio.sox_effects
8
-
9
- :param audio: Audio
10
- :param augmentation: Augmentation
11
- :param frame_length: Pad resulting audio to be a multiple of this
12
- :return: Augmented audio
13
- """
14
- import numpy as np
15
- import torch
16
- import torchaudio
17
-
18
- from .augmentation import pad_audio_to_frame
19
- from .constants import SAMPLE_RATE
20
-
21
- effects: list[list[str]] = []
22
-
23
- # TODO: Always normalize and remove normalize from list of available augmentations
24
- # Normalize to globally set level (should this be a global config parameter, or hard-coded into the script?)
25
- # TODO: Support all sox effects supported by torchaudio (torchaudio.sox_effects.effect_names())
26
- if augmentation.normalize is not None:
27
- effects.append(["norm", str(augmentation.normalize)])
28
-
29
- if augmentation.gain is not None:
30
- effects.append(["gain", str(augmentation.gain)])
31
-
32
- if augmentation.pitch is not None:
33
- effects.append(["pitch", str(augmentation.pitch)])
34
- effects.append(["rate", str(SAMPLE_RATE)])
35
-
36
- if augmentation.tempo is not None:
37
- effects.append(["tempo", "-s", str(augmentation.tempo)])
38
-
39
- if augmentation.eq1 is not None:
40
- effects.append(["equalizer", *[str(item) for item in augmentation.eq1]])
41
-
42
- if augmentation.eq2 is not None:
43
- effects.append(["equalizer", *[str(item) for item in augmentation.eq2]])
44
-
45
- if augmentation.eq3 is not None:
46
- effects.append(["equalizer", *[str(item) for item in augmentation.eq3]])
47
-
48
- if augmentation.lpf is not None:
49
- effects.append(["lowpass", "-2", str(augmentation.lpf), "0.707"])
50
-
51
- if effects:
52
- if audio.ndim == 1:
53
- audio = np.reshape(audio, (1, audio.shape[0]))
54
- out = torch.tensor(audio)
55
-
56
- try:
57
- out, _ = torchaudio.sox_effects.apply_effects_tensor(out, sample_rate=SAMPLE_RATE, effects=effects)
58
- except Exception as e:
59
- raise RuntimeError(f"Error applying {augmentation}: {e}") from e
60
-
61
- audio_out = np.squeeze(np.array(out))
62
- else:
63
- audio_out = audio
64
-
65
- # make sure length is multiple of frame_length
66
- return pad_audio_to_frame(audio=audio_out, frame_length=frame_length)
67
-
68
-
69
- def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
70
- """Apply impulse response to audio data using torchaudio.fftconvolve
71
-
72
- :param audio: Audio
73
- :param ir: Impulse response data
74
- :return: Augmented audio
75
- """
76
- import numpy as np
77
- import torch
78
- import torchaudio
79
-
80
- from sonusai.utils import linear_to_db
81
-
82
- from .constants import SAMPLE_RATE
83
-
84
- # Early exit if no ir or if all audio is zero
85
- if ir is None or not audio.any():
86
- return audio
87
-
88
- # Get current maximum level in dB
89
- max_db = linear_to_db(max(abs(audio)))
90
-
91
- # Convert audio to IR sample rate
92
- audio_in = torch.reshape(torch.tensor(audio), (1, len(audio)))
93
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
94
- audio_in, sample_rate=SAMPLE_RATE, effects=[["rate", str(ir.sample_rate)]]
95
- )
96
-
97
- # Apply IR and convert back to global sample rate
98
- rir = torch.reshape(torch.tensor(ir.data), (1, len(ir.data)))
99
- audio_out = torchaudio.functional.fftconvolve(audio_out, rir)
100
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
101
- audio_out, sample_rate=ir.sample_rate, effects=[["rate", str(SAMPLE_RATE)]]
102
- )
103
-
104
- # Reset level to previous max value
105
- audio_out, sr = torchaudio.sox_effects.apply_effects_tensor(
106
- audio_out, sample_rate=SAMPLE_RATE, effects=[["norm", str(max_db)]]
107
- )
108
-
109
- return np.squeeze(np.array(audio_out[:, : len(audio)]))