sonusai 0.18.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +1 -0
- sonusai/audiofe.py +1 -1
- sonusai/calc_metric_spenh.py +32 -362
- sonusai/data/genmixdb.yml +2 -0
- sonusai/doc/doc.py +45 -4
- sonusai/genmetrics.py +137 -109
- sonusai/lsdb.py +2 -2
- sonusai/metrics/__init__.py +4 -0
- sonusai/metrics/calc_audio_stats.py +42 -0
- sonusai/metrics/calc_pesq.py +12 -8
- sonusai/metrics/calc_phase_distance.py +43 -0
- sonusai/metrics/calc_snr_f.py +34 -0
- sonusai/metrics/calc_speech.py +312 -0
- sonusai/metrics/calc_wer.py +2 -3
- sonusai/metrics/calc_wsdr.py +0 -59
- sonusai/mixture/__init__.py +3 -2
- sonusai/mixture/audio.py +6 -5
- sonusai/mixture/config.py +13 -0
- sonusai/mixture/constants.py +1 -0
- sonusai/mixture/datatypes.py +33 -0
- sonusai/mixture/generation.py +6 -2
- sonusai/mixture/mixdb.py +261 -122
- sonusai/mixture/soundfile_audio.py +8 -6
- sonusai/mixture/sox_audio.py +16 -13
- sonusai/mixture/torchaudio_audio.py +6 -4
- sonusai/mixture/truth_functions/energy.py +40 -28
- sonusai/mixture/truth_functions/target.py +0 -1
- sonusai/utils/__init__.py +1 -1
- sonusai/utils/asr.py +26 -39
- sonusai/utils/asr_functions/aaware_whisper.py +3 -3
- {sonusai-0.18.2.dist-info → sonusai-0.18.4.dist-info}/METADATA +1 -1
- {sonusai-0.18.2.dist-info → sonusai-0.18.4.dist-info}/RECORD +34 -31
- sonusai/mixture/mapped_snr_f.py +0 -100
- {sonusai-0.18.2.dist-info → sonusai-0.18.4.dist-info}/WHEEL +0 -0
- {sonusai-0.18.2.dist-info → sonusai-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,11 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
1
3
|
from sonusai.mixture.datatypes import Truth
|
2
4
|
from sonusai.mixture.truth_functions.data import Data
|
3
5
|
|
4
6
|
|
5
7
|
def _core(data: Data, mapped: bool, snr: bool) -> Truth:
|
6
|
-
import numpy as np
|
7
|
-
|
8
8
|
from sonusai import SonusAIError
|
9
|
-
from sonusai.mixture import calculate_mapped_snr_f
|
10
9
|
from sonusai.utils import compute_energy_f
|
11
10
|
|
12
11
|
snr_db_mean = None
|
@@ -52,7 +51,7 @@ def _core(data: Data, mapped: bool, snr: bool) -> Truth:
|
|
52
51
|
tmp = np.nan_to_num(tmp, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
|
53
52
|
|
54
53
|
if mapped:
|
55
|
-
tmp =
|
54
|
+
tmp = _calculate_mapped_snr_f(tmp, snr_db_mean, snr_db_std)
|
56
55
|
|
57
56
|
for index in data.zero_based_indices:
|
58
57
|
data.truth[offset:offset + data.frame_size, index:index + data.target_fft.bins] = tmp
|
@@ -60,16 +59,31 @@ def _core(data: Data, mapped: bool, snr: bool) -> Truth:
|
|
60
59
|
return data.truth
|
61
60
|
|
62
61
|
|
62
|
+
def _calculate_mapped_snr_f(truth_f: np.ndarray, snr_db_mean: np.ndarray, snr_db_std: np.ndarray) -> np.ndarray:
|
63
|
+
"""Calculate mapped SNR from standard SNR energy per bin/class."""
|
64
|
+
import scipy.special as sc
|
65
|
+
|
66
|
+
old_err = np.seterr(divide='ignore', invalid='ignore')
|
67
|
+
num = 10 * np.log10(np.double(truth_f)) - np.double(snr_db_mean)
|
68
|
+
den = np.double(snr_db_std) * np.sqrt(2)
|
69
|
+
q = num / den
|
70
|
+
q = np.nan_to_num(q, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
|
71
|
+
result = 0.5 * (1 + sc.erf(q))
|
72
|
+
np.seterr(**old_err)
|
73
|
+
|
74
|
+
return result.astype(np.float32)
|
75
|
+
|
76
|
+
|
63
77
|
def energy_f(data: Data) -> Truth:
|
64
78
|
"""Frequency domain energy truth generation function
|
65
79
|
|
66
|
-
Calculates the true energy per bin:
|
80
|
+
Calculates the true energy per bin:
|
67
81
|
|
68
|
-
Ti^2 + Tr^2
|
82
|
+
Ti^2 + Tr^2
|
69
83
|
|
70
|
-
where T is the target STFT bin values.
|
84
|
+
where T is the target STFT bin values.
|
71
85
|
|
72
|
-
Output shape: [:, bins]
|
86
|
+
Output shape: [:, bins]
|
73
87
|
"""
|
74
88
|
return _core(data=data, mapped=False, snr=False)
|
75
89
|
|
@@ -77,13 +91,13 @@ Output shape: [:, bins]
|
|
77
91
|
def snr_f(data: Data) -> Truth:
|
78
92
|
"""Frequency domain SNR truth function documentation
|
79
93
|
|
80
|
-
Calculates the true SNR per bin:
|
94
|
+
Calculates the true SNR per bin:
|
81
95
|
|
82
|
-
(Ti^2 + Tr^2) / (Ni^2 + Nr^2)
|
96
|
+
(Ti^2 + Tr^2) / (Ni^2 + Nr^2)
|
83
97
|
|
84
|
-
where T is the target and N is the noise STFT bin values.
|
98
|
+
where T is the target and N is the noise STFT bin values.
|
85
99
|
|
86
|
-
Output shape: [:, bins]
|
100
|
+
Output shape: [:, bins]
|
87
101
|
"""
|
88
102
|
return _core(data=data, mapped=False, snr=True)
|
89
103
|
|
@@ -91,7 +105,7 @@ Output shape: [:, bins]
|
|
91
105
|
def mapped_snr_f(data: Data) -> Truth:
|
92
106
|
"""Frequency domain mapped SNR truth function documentation
|
93
107
|
|
94
|
-
Output shape: [:, bins]
|
108
|
+
Output shape: [:, bins]
|
95
109
|
"""
|
96
110
|
return _core(data=data, mapped=True, snr=True)
|
97
111
|
|
@@ -99,27 +113,25 @@ Output shape: [:, bins]
|
|
99
113
|
def energy_t(data: Data) -> Truth:
|
100
114
|
"""Time domain energy truth function documentation
|
101
115
|
|
102
|
-
Calculates the true time domain energy of each frame:
|
116
|
+
Calculates the true time domain energy of each frame:
|
103
117
|
|
104
|
-
For OLS:
|
105
|
-
|
118
|
+
For OLS:
|
119
|
+
sum(x[0:N-1]^2) / N
|
106
120
|
|
107
|
-
For OLA:
|
108
|
-
|
121
|
+
For OLA:
|
122
|
+
sum(x[0:R-1]^2) / R
|
109
123
|
|
110
|
-
where x is the target time domain data,
|
111
|
-
N is the size of the transform, and
|
112
|
-
R is the number of new samples in the frame.
|
124
|
+
where x is the target time domain data,
|
125
|
+
N is the size of the transform, and
|
126
|
+
R is the number of new samples in the frame.
|
113
127
|
|
114
|
-
Output shape: [:, 1]
|
128
|
+
Output shape: [:, 1]
|
115
129
|
|
116
|
-
Note: feature transforms can be defined to use a subset of all bins,
|
117
|
-
i.e., subset of 0:128 for N=256 could be 0:127 or 1:128. energy_t
|
118
|
-
will reflect the total energy over all bins regardless of the feature
|
119
|
-
transform config.
|
130
|
+
Note: feature transforms can be defined to use a subset of all bins,
|
131
|
+
i.e., subset of 0:128 for N=256 could be 0:127 or 1:128. energy_t
|
132
|
+
will reflect the total energy over all bins regardless of the feature
|
133
|
+
transform config.
|
120
134
|
"""
|
121
|
-
import numpy as np
|
122
|
-
|
123
135
|
from sonusai import SonusAIError
|
124
136
|
|
125
137
|
_, target_energy = data.target_fft.execute_all(data.target_audio)
|
sonusai/utils/__init__.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# SonusAI general utilities
|
2
2
|
from .asl_p56 import asl_p56
|
3
|
-
from .asr import ASRData
|
4
3
|
from .asr import ASRResult
|
5
4
|
from .asr import calc_asr
|
5
|
+
from .asr import get_available_engines
|
6
6
|
from .audio_devices import get_default_input_device
|
7
7
|
from .audio_devices import get_input_device_index_by_name
|
8
8
|
from .audio_devices import get_input_devices
|
sonusai/utils/asr.py
CHANGED
@@ -1,22 +1,10 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
|
-
from typing import Any
|
3
2
|
from typing import Callable
|
4
3
|
from typing import Optional
|
5
4
|
|
6
5
|
from sonusai.mixture import AudioT
|
7
6
|
|
8
7
|
|
9
|
-
@dataclass(frozen=True)
|
10
|
-
class ASRData:
|
11
|
-
audio: AudioT
|
12
|
-
whisper_model: Optional[Any] = None
|
13
|
-
whisper_model_name: Optional[str] = None
|
14
|
-
device: Optional[str] = None
|
15
|
-
cpu_threads: Optional[int] = None
|
16
|
-
compute_type: Optional[str] = None
|
17
|
-
beam_size: Optional[int] = None
|
18
|
-
|
19
|
-
|
20
8
|
@dataclass(frozen=True)
|
21
9
|
class ASRResult:
|
22
10
|
text: str
|
@@ -28,57 +16,56 @@ class ASRResult:
|
|
28
16
|
asr_cpu_time: Optional[float] = None
|
29
17
|
|
30
18
|
|
31
|
-
def get_available_engines() ->
|
19
|
+
def get_available_engines() -> list[str]:
|
32
20
|
from importlib import import_module
|
33
21
|
from pkgutil import iter_modules
|
34
22
|
|
35
23
|
module = import_module('sonusai.utils.asr_functions')
|
36
|
-
engines =
|
24
|
+
engines = [method for method in dir(module) if not method.startswith('_')]
|
37
25
|
for _, name, _ in iter_modules():
|
38
26
|
if name.startswith('sonusai_asr_'):
|
39
27
|
module = import_module(f'{name}.asr_functions')
|
40
28
|
for method in dir(module):
|
41
29
|
if not method.startswith('_'):
|
42
|
-
engines
|
30
|
+
engines.append(method)
|
43
31
|
|
44
32
|
return engines
|
45
33
|
|
46
34
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
35
|
+
def _asr_fn(engine: str) -> Callable[..., ASRResult]:
|
36
|
+
from importlib import import_module
|
37
|
+
from pkgutil import iter_modules
|
38
|
+
|
39
|
+
module = import_module('sonusai.utils.asr_functions')
|
40
|
+
for method in dir(module):
|
41
|
+
if method == engine:
|
42
|
+
return getattr(module, method)
|
43
|
+
|
44
|
+
for _, name, _ in iter_modules():
|
45
|
+
if name.startswith('sonusai_asr_'):
|
46
|
+
module = import_module(f'{name}.asr_functions')
|
47
|
+
for method in dir(module):
|
48
|
+
if method == engine:
|
49
|
+
return getattr(module, method)
|
50
|
+
|
51
|
+
raise ValueError(f'engine {engine} not supported')
|
52
|
+
|
53
|
+
|
54
|
+
def calc_asr(audio: AudioT | str, engine: str, **config) -> ASRResult:
|
55
55
|
"""Run ASR on audio
|
56
56
|
|
57
57
|
:param audio: Numpy array of audio samples or location of an audio file
|
58
|
-
:param engine:
|
59
|
-
:param
|
60
|
-
:param whisper_model_name: Name of Whisper ASR model to use if none was provided
|
61
|
-
:param device: the device to put the ASR model into
|
62
|
-
:param cpu_threads: int specifying threads to use when device is cpu
|
63
|
-
note: must be 1 if this func is run in parallel
|
64
|
-
:param compute_type: the precision of ASR model to use
|
65
|
-
:param beam_size: int specifying beam_size to use
|
58
|
+
:param engine: ASR engine to use
|
59
|
+
:param config: kwargs configuration parameters
|
66
60
|
:return: ASRResult object containing text and confidence
|
67
61
|
"""
|
68
62
|
from copy import copy
|
69
63
|
|
70
64
|
import numpy as np
|
71
65
|
|
72
|
-
from sonusai import SonusAIError
|
73
66
|
from sonusai.mixture import read_audio
|
74
67
|
|
75
|
-
available_engines = get_available_engines()
|
76
|
-
if engine not in available_engines:
|
77
|
-
raise SonusAIError(f'Unsupported ASR function: {engine}')
|
78
|
-
|
79
68
|
if not isinstance(audio, np.ndarray):
|
80
69
|
audio = copy(read_audio(audio))
|
81
70
|
|
82
|
-
|
83
|
-
|
84
|
-
return available_engines[engine](data)
|
71
|
+
return _asr_fn(engine)(audio, **config)
|
@@ -1,8 +1,8 @@
|
|
1
|
-
from sonusai.
|
1
|
+
from sonusai.mixture import AudioT
|
2
2
|
from sonusai.utils import ASRResult
|
3
3
|
|
4
4
|
|
5
|
-
def aaware_whisper(
|
5
|
+
def aaware_whisper(audio: AudioT, **_config) -> ASRResult:
|
6
6
|
import tempfile
|
7
7
|
from math import exp
|
8
8
|
from os import getenv
|
@@ -22,7 +22,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
|
|
22
22
|
|
23
23
|
with tempfile.TemporaryDirectory() as tmp:
|
24
24
|
file = join(tmp, 'asr.wav')
|
25
|
-
write_audio(name=file, audio=float_to_int16(
|
25
|
+
write_audio(name=file, audio=float_to_int16(audio))
|
26
26
|
|
27
27
|
files = {'audio_file': (file, open(file, 'rb'), 'audio/wav')}
|
28
28
|
|
@@ -1,67 +1,70 @@
|
|
1
|
-
sonusai/__init__.py,sha256=
|
1
|
+
sonusai/__init__.py,sha256=s77La_fCT4TJtXR8fT0jHrWgGwOQI5qonyS3LStzsyw,2980
|
2
2
|
sonusai/aawscd_probwrite.py,sha256=GukR5owp_0A3DrqSl9fHWULYgclNft4D5OkHIwfxxkc,3698
|
3
|
-
sonusai/audiofe.py,sha256=
|
4
|
-
sonusai/calc_metric_spenh.py,sha256=
|
3
|
+
sonusai/audiofe.py,sha256=3LssRiL73DH8teihD9f3nCvfZ0a65WQtXCqWGnKHuJM,11157
|
4
|
+
sonusai/calc_metric_spenh.py,sha256=ee2xrx6L1lFyWSoQSiq56He3RQ1cF7T_ak-6TjejXsc,47738
|
5
5
|
sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
sonusai/data/genmixdb.yml,sha256
|
6
|
+
sonusai/data/genmixdb.yml,sha256=p7AhJjtQ4S_xtDhVsFEqUi8fmvNzCLCV1LPdoNYvmWo,1043
|
7
7
|
sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
|
8
8
|
sonusai/data/whitenoise.wav,sha256=I2umov0m34y56F9IsIBi1XtE76ZeZaSKDf70cJRe3pI,1920044
|
9
9
|
sonusai/doc/__init__.py,sha256=rP5Hgn0Iys_xkuv4caxngdqehuU4zLZsiKuv8Nde67M,19
|
10
|
-
sonusai/doc/doc.py,sha256=
|
10
|
+
sonusai/doc/doc.py,sha256=8gftMlCaVa0hIA_BjSHbEYgSTq_BAZwC5Ipk-BFgGp8,22919
|
11
11
|
sonusai/doc.py,sha256=l8CaFgLI8mqx4tn0aXfxKqa2dy9GgC0zjYxZAkpmi1E,878
|
12
12
|
sonusai/genft.py,sha256=OzET3iTE-QhrUckzidfZvCDXZlAxIF5Xe5NEf856Vvk,5662
|
13
|
-
sonusai/genmetrics.py,sha256=
|
13
|
+
sonusai/genmetrics.py,sha256=YS5ZuhOR77-wGH8ju7z_ihQSDhYhV_iJWlWWMemLIGU,4998
|
14
14
|
sonusai/genmix.py,sha256=TU5aTebGHsbfwsRbynYbegGBelSma9khuQkDk0dFE3I,7075
|
15
15
|
sonusai/genmixdb.py,sha256=gF2qas1tH9MHEFLoEMrN3kYVm-vhAKaOuZ8ev-w4MQM,19553
|
16
16
|
sonusai/gentcst.py,sha256=W1ZO3xs7CoZkFcvOTH-FLJOIA4I7Wzb0HVRC3hGGSaM,20223
|
17
|
-
sonusai/lsdb.py,sha256=
|
17
|
+
sonusai/lsdb.py,sha256=oLyGoq46hkravDUXhI-wwhKdX_GESoOD5-E7nEJl7rc,5996
|
18
18
|
sonusai/main.py,sha256=GC-pQrSqx9tWwIcmEo6V9SraEv5KskBLS_W_wz-f2ZM,2509
|
19
|
-
sonusai/metrics/__init__.py,sha256=
|
19
|
+
sonusai/metrics/__init__.py,sha256=GEenHTytN-_qMAF7UJypqkArudJsHyctGSKd0bKUniQ,807
|
20
|
+
sonusai/metrics/calc_audio_stats.py,sha256=u-HIyrqJUCb0G7WR865TcpTVkT2lsCnb2rwB67IgzEc,1308
|
20
21
|
sonusai/metrics/calc_class_weights.py,sha256=dyY7daEIf5Ms5tfTf6wF0fkx_GnMADHOZR_rtsfGoVM,3933
|
21
22
|
sonusai/metrics/calc_optimal_thresholds.py,sha256=9fRfwl-aKAbzHJyqGHv4o8BpZXG9HHB7zUJObHXfYM4,3522
|
22
23
|
sonusai/metrics/calc_pcm.py,sha256=n_QoDwdM09HZx0zMbCkJR07e9fCLo03Ilpi1nqdizRg,1906
|
23
|
-
sonusai/metrics/calc_pesq.py,sha256=
|
24
|
+
sonusai/metrics/calc_pesq.py,sha256=vO3HnQfyzYLy6VILhP5E-Yi6HkI6T9ob_CEfPo6NG50,971
|
25
|
+
sonusai/metrics/calc_phase_distance.py,sha256=Oh7UrH4ekm48pIhOnAOmkSr1OWueq2GUlwJheoAQmnk,1905
|
24
26
|
sonusai/metrics/calc_sa_sdr.py,sha256=pup1uzHbSqV3JprYCXmu_hbWILqRME5KAnFJGe4WXCY,2566
|
25
27
|
sonusai/metrics/calc_sample_weights.py,sha256=0O2EH1-FKlCa0HFgKPUF1BJiknR1hCH7zLbXnoXH7Ag,945
|
26
|
-
sonusai/metrics/
|
27
|
-
sonusai/metrics/
|
28
|
+
sonusai/metrics/calc_snr_f.py,sha256=cLRUYeyRuoDtXWOgrgSzFP3EMOBlRaBcPM1BcxpEU_M,1199
|
29
|
+
sonusai/metrics/calc_speech.py,sha256=i-BjYiSY-M3YSbKFUOW4m7m4frkF4ikyJgyizhDjGm0,14448
|
30
|
+
sonusai/metrics/calc_wer.py,sha256=7VVW_YsbeysKOZOTtvXVAvWi8lb0PZtu-Xg2HiXomWc,2483
|
31
|
+
sonusai/metrics/calc_wsdr.py,sha256=9wiCJsGgRGHzBpHv0zSxorMsN_JZNBPyge280Hd1W0w,2531
|
28
32
|
sonusai/metrics/class_summary.py,sha256=4Mb25nuk6eqotnQSFMuOQL3zofGcpNXDfDlPa513ZLQ,3286
|
29
33
|
sonusai/metrics/confusion_matrix_summary.py,sha256=3qg6TMKjJeHtNjj2YnNjPFSlMrQXt0Zcu1dLkGB_aPU,4001
|
30
34
|
sonusai/metrics/one_hot.py,sha256=QSeH_GdqBpOAKLrNnQ8gjcPC-vSdUqC0yPEQueTA6VI,13548
|
31
35
|
sonusai/metrics/snr_summary.py,sha256=P4U5_Xr7v9F8kF-rZBnpsVNt3p42rIVS6zmch8yfVfg,5575
|
32
|
-
sonusai/mixture/__init__.py,sha256=
|
33
|
-
sonusai/mixture/audio.py,sha256=
|
36
|
+
sonusai/mixture/__init__.py,sha256=5M8wEFLkySuaFUKFLhXGPs9SNt2fLwsr6QqEYi7-eDY,5358
|
37
|
+
sonusai/mixture/audio.py,sha256=2u7nnKdKwXRA6YWVwCurst5D6CQinKZj5jlm9LhXe9c,2445
|
34
38
|
sonusai/mixture/augmentation.py,sha256=Blb90tdTwBOj5w9tRcYyS5H67YJuFiXsGqwZWd7ON4g,10468
|
35
39
|
sonusai/mixture/class_count.py,sha256=_wFnVl2yEOnbor7pLg7cYOUeX6nioov-03Cv3SEbh2k,996
|
36
|
-
sonusai/mixture/config.py,sha256=
|
37
|
-
sonusai/mixture/constants.py,sha256=
|
38
|
-
sonusai/mixture/datatypes.py,sha256=
|
40
|
+
sonusai/mixture/config.py,sha256=Fu6-HkenI_h7BdOTuGX9B-vsbLJD5pvTPd_LAncZgI4,22828
|
41
|
+
sonusai/mixture/constants.py,sha256=90qaRIEcmIoS3Od5h_UP0_SkkvG2aE_eYPv6WsIktC0,1427
|
42
|
+
sonusai/mixture/datatypes.py,sha256=uYNToIngMB4m0pgmaAFj7H27FvrMCiwaAAXDXkvIB6w,9100
|
39
43
|
sonusai/mixture/db_datatypes.py,sha256=GDYbcSrlgUJsesiUUNnR4s5aBkMgviiNSQDaBcgYX7I,1428
|
40
44
|
sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
|
41
45
|
sonusai/mixture/feature.py,sha256=Rwuf82IoXzhHPGbKYVGcatImF_ssBf_FfvbqghVPXtg,4116
|
42
|
-
sonusai/mixture/generation.py,sha256=
|
46
|
+
sonusai/mixture/generation.py,sha256=W3n6ipI-dxg4Wj6YBJn8RTpFqkAyIXzxwObeFbSLq08,42801
|
43
47
|
sonusai/mixture/helpers.py,sha256=eC9ZysEa-83VLKen_9PKWzr8w9dkHj4lp6rMB2fNLbg,24759
|
44
48
|
sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
|
45
|
-
sonusai/mixture/
|
46
|
-
sonusai/mixture/
|
47
|
-
sonusai/mixture/
|
48
|
-
sonusai/mixture/sox_audio.py,sha256=HT3kYA9TP5QPCuoOJdUMnGVN-qY6q96DGL8zxuog76o,12277
|
49
|
+
sonusai/mixture/mixdb.py,sha256=dtEuF2S4wg8G5pl_lkfly5k0kAg_g9xMukVLdb4G8iY,57458
|
50
|
+
sonusai/mixture/soundfile_audio.py,sha256=BwO4lftNvrhoPTJERONcrpxSpM2fjO6kL_e5Ylz742A,4220
|
51
|
+
sonusai/mixture/sox_audio.py,sha256=Pu5RLE8lKKlGOzZsbfcVzbDvy7YPGBuGpbD_PxowFqA,12511
|
49
52
|
sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
|
50
53
|
sonusai/mixture/spectral_mask.py,sha256=8AkCwhy-PSdP1Uri9miKZP-bXFYnFcH_c9xZCGrHavU,2071
|
51
54
|
sonusai/mixture/target_class_balancing.py,sha256=NTNiKZH0_PWLooeow0l41CjJKK8ZTMVbUqz9ZkaNtWk,4900
|
52
55
|
sonusai/mixture/targets.py,sha256=wyy5vhLhuN-hqBMBGoziVvEJg3FKFvJFgmEE7_LaV2M,7908
|
53
56
|
sonusai/mixture/tokenized_shell_vars.py,sha256=zIAFvwP2WSvkMAGY7f3SJ4KLXI6IBT-U_e9ptnoo5Hc,4803
|
54
|
-
sonusai/mixture/torchaudio_audio.py,sha256=
|
57
|
+
sonusai/mixture/torchaudio_audio.py,sha256=V7OUqkiwplQoIIMPBRWBHqsuPHAYYByt5QMWx7cPIJI,3062
|
55
58
|
sonusai/mixture/torchaudio_augmentation.py,sha256=1vEDHI0caL1vrgoY2lAWe4CiHE2jKRuKKH7x23GHw0w,4390
|
56
59
|
sonusai/mixture/truth.py,sha256=Y41pZ52Xkols9LUler0NlgnilUOscBIucmw4GcxXNzU,1612
|
57
60
|
sonusai/mixture/truth_functions/__init__.py,sha256=82lKYHhLy8KW3gHngrocoqwupGVLVsWdIXdYs3vhjOc,359
|
58
61
|
sonusai/mixture/truth_functions/crm.py,sha256=_Vy8UMrOUQXsrM3nutvUMWCpvI8GePr01QFlyqLFd4k,2626
|
59
62
|
sonusai/mixture/truth_functions/data.py,sha256=okFJeOf43NxfdLqWFCBA2pOGqujRlNDYdAcwwR_m8z8,2875
|
60
|
-
sonusai/mixture/truth_functions/energy.py,sha256=
|
63
|
+
sonusai/mixture/truth_functions/energy.py,sha256=xWxjrHM1xPitRFML-X7ruqXMmXVII5MKDjU38WO6bG0,4863
|
61
64
|
sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80BJq59LhcBUM,1539
|
62
65
|
sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
|
63
66
|
sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
|
64
|
-
sonusai/mixture/truth_functions/target.py,sha256=
|
67
|
+
sonusai/mixture/truth_functions/target.py,sha256=gy2OGAV_uIrZunNBRC6kZx3qg48TSzNI_HaXGzPKg3s,5763
|
65
68
|
sonusai/mkwav.py,sha256=zfSyIiQTIK3KV9Ij33jkLhhZIMVYqaROcRQ4S7c4sIo,5364
|
66
69
|
sonusai/onnx_predict.py,sha256=jSxhD2oFyGSTHOGCXbW4fRT-k4SqKOboK2JaDO-yWcs,8737
|
67
70
|
sonusai/plot.py,sha256=ERkmxMM3qjcCDm4LGDQY4fRAncCYAzP7uW8iZ7_brcg,17105
|
@@ -79,11 +82,11 @@ sonusai/speech/vctk.py,sha256=EAMEBAzjZUI6dw15n-yI2oCN-H4tzM9t4aUVlOxpAbo,1540
|
|
79
82
|
sonusai/speech/voxceleb.py,sha256=aJGN0yDb2LFLmCKmRzmUEjpZWQ-QGWw6XWOpy9967AI,2686
|
80
83
|
sonusai/summarize_metric_spenh.py,sha256=OiZe_bhCq5esXNhsOkHDD7g4ssYrpENDHvDVoPzV9iw,1822
|
81
84
|
sonusai/tplot.py,sha256=85T6OPZfxVegHBiSuilFpdgCNMEE0VKAuciNy4rCY5Y,14544
|
82
|
-
sonusai/utils/__init__.py,sha256=
|
85
|
+
sonusai/utils/__init__.py,sha256=UEM7SkO_xxu0VW-u_zI3e8Xih2CX4aqYjqr61LO5EQA,2271
|
83
86
|
sonusai/utils/asl_p56.py,sha256=-bvQpd-jRQVURbkZJpRoyEAq6gTv9Rc3oFDbh5_lcjY,3861
|
84
|
-
sonusai/utils/asr.py,sha256=
|
87
|
+
sonusai/utils/asr.py,sha256=V0-cW56KS6aOSakSVwQIbhkqEkKf5l8T11b8ciNxWDw,2132
|
85
88
|
sonusai/utils/asr_functions/__init__.py,sha256=JyHK67s97bw7QzrlkboWhws4yNytdPatqzLJxfwx-yw,43
|
86
|
-
sonusai/utils/asr_functions/aaware_whisper.py,sha256=
|
89
|
+
sonusai/utils/asr_functions/aaware_whisper.py,sha256=NPqc62wKhshYecu368Yvl2xcImp2a_kV3Q229B2gyOI,1992
|
87
90
|
sonusai/utils/audio_devices.py,sha256=LgaXTln1oRArBzaet3rZiIO2plgtaThuGBc3sJ_sLlo,1414
|
88
91
|
sonusai/utils/braced_glob.py,sha256=Z_XIpPK17QiP1JbzAnUC5w3oyG8ZovoyM22Wh-Q_vWU,1675
|
89
92
|
sonusai/utils/calculate_input_shape.py,sha256=63ILxibYKuTQozY83QN8Y2OOhBEbW_1X47Q0askcHDM,984
|
@@ -116,7 +119,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2z
|
|
116
119
|
sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
|
117
120
|
sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
|
118
121
|
sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
|
119
|
-
sonusai-0.18.
|
120
|
-
sonusai-0.18.
|
121
|
-
sonusai-0.18.
|
122
|
-
sonusai-0.18.
|
122
|
+
sonusai-0.18.4.dist-info/METADATA,sha256=8Q0ANLLD6sjL_HJgqY3rrAEyj6m2emeOgPeraSjnUZ4,2591
|
123
|
+
sonusai-0.18.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
124
|
+
sonusai-0.18.4.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
|
125
|
+
sonusai-0.18.4.dist-info/RECORD,,
|
sonusai/mixture/mapped_snr_f.py
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
|
3
|
-
|
4
|
-
def calculate_snr_f_statistics(truth_f: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
5
|
-
"""Calculate statistics of snr_f truth data.
|
6
|
-
|
7
|
-
For now, includes mean and standard deviation of the raw values (usually energy)
|
8
|
-
and mean and standard deviation of the dB values (10 * log10).
|
9
|
-
"""
|
10
|
-
return (
|
11
|
-
calculate_snr_mean(truth_f),
|
12
|
-
calculate_snr_std(truth_f),
|
13
|
-
calculate_snr_db_mean(truth_f),
|
14
|
-
calculate_snr_db_std(truth_f),
|
15
|
-
)
|
16
|
-
|
17
|
-
|
18
|
-
def calculate_snr_mean(truth_f: np.ndarray) -> np.ndarray:
|
19
|
-
"""Calculate mean of snr_f truth data."""
|
20
|
-
snr_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
|
21
|
-
|
22
|
-
for c in range(truth_f.shape[1]):
|
23
|
-
tmp_truth = truth_f[:, c]
|
24
|
-
tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
|
25
|
-
|
26
|
-
if len(tmp) == 0:
|
27
|
-
snr_mean[c] = -np.inf
|
28
|
-
else:
|
29
|
-
snr_mean[c] = np.mean(tmp)
|
30
|
-
|
31
|
-
return snr_mean
|
32
|
-
|
33
|
-
|
34
|
-
def calculate_snr_std(truth_f: np.ndarray) -> np.ndarray:
|
35
|
-
"""Calculate standard deviation of snr_f truth data."""
|
36
|
-
snr_std = np.zeros(truth_f.shape[1], dtype=np.float32)
|
37
|
-
|
38
|
-
for c in range(truth_f.shape[1]):
|
39
|
-
tmp_truth = truth_f[:, c]
|
40
|
-
tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
|
41
|
-
|
42
|
-
if len(tmp) == 0:
|
43
|
-
snr_std[c] = -np.inf
|
44
|
-
else:
|
45
|
-
snr_std[c] = np.std(tmp, ddof=1)
|
46
|
-
|
47
|
-
return snr_std
|
48
|
-
|
49
|
-
|
50
|
-
def calculate_snr_db_mean(truth_f: np.ndarray) -> np.ndarray:
|
51
|
-
"""Calculate dB mean of snr_f truth data."""
|
52
|
-
snr_db_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
|
53
|
-
|
54
|
-
for c in range(truth_f.shape[1]):
|
55
|
-
tmp_truth = truth_f[:, c]
|
56
|
-
tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
|
57
|
-
|
58
|
-
tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
|
59
|
-
tmp2 = tmp2[np.isfinite(tmp2)]
|
60
|
-
|
61
|
-
if len(tmp2) == 0:
|
62
|
-
snr_db_mean[c] = -np.inf
|
63
|
-
else:
|
64
|
-
snr_db_mean[c] = np.mean(tmp2)
|
65
|
-
|
66
|
-
return snr_db_mean
|
67
|
-
|
68
|
-
|
69
|
-
def calculate_snr_db_std(truth_f: np.ndarray) -> np.ndarray:
|
70
|
-
"""Calculate dB standard deviation of snr_f truth data."""
|
71
|
-
snr_db_std = np.zeros(truth_f.shape[1], dtype=np.float32)
|
72
|
-
|
73
|
-
for c in range(truth_f.shape[1]):
|
74
|
-
tmp_truth = truth_f[:, c]
|
75
|
-
tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
|
76
|
-
|
77
|
-
tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
|
78
|
-
tmp2 = tmp2[np.isfinite(tmp2)]
|
79
|
-
|
80
|
-
if len(tmp2) == 0:
|
81
|
-
snr_db_std[c] = -np.inf
|
82
|
-
else:
|
83
|
-
snr_db_std[c] = np.std(tmp2, ddof=1)
|
84
|
-
|
85
|
-
return snr_db_std
|
86
|
-
|
87
|
-
|
88
|
-
def calculate_mapped_snr_f(truth_f: np.ndarray, snr_db_mean: np.ndarray, snr_db_std: np.ndarray) -> np.ndarray:
|
89
|
-
"""Calculate mapped SNR from standard SNR energy per bin/class."""
|
90
|
-
import scipy.special as sc
|
91
|
-
|
92
|
-
old_err = np.seterr(divide='ignore', invalid='ignore')
|
93
|
-
num = 10 * np.log10(np.double(truth_f)) - np.double(snr_db_mean)
|
94
|
-
den = np.double(snr_db_std) * np.sqrt(2)
|
95
|
-
q = num / den
|
96
|
-
q = np.nan_to_num(q, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
|
97
|
-
mapped_snr_f = 0.5 * (1 + sc.erf(q))
|
98
|
-
np.seterr(**old_err)
|
99
|
-
|
100
|
-
return mapped_snr_f.astype(np.float32)
|
File without changes
|
File without changes
|