sonusai 0.18.2__py3-none-any.whl → 0.18.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,11 @@
1
+ import numpy as np
2
+
1
3
  from sonusai.mixture.datatypes import Truth
2
4
  from sonusai.mixture.truth_functions.data import Data
3
5
 
4
6
 
5
7
  def _core(data: Data, mapped: bool, snr: bool) -> Truth:
6
- import numpy as np
7
-
8
8
  from sonusai import SonusAIError
9
- from sonusai.mixture import calculate_mapped_snr_f
10
9
  from sonusai.utils import compute_energy_f
11
10
 
12
11
  snr_db_mean = None
@@ -52,7 +51,7 @@ def _core(data: Data, mapped: bool, snr: bool) -> Truth:
52
51
  tmp = np.nan_to_num(tmp, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
53
52
 
54
53
  if mapped:
55
- tmp = calculate_mapped_snr_f(tmp, snr_db_mean, snr_db_std)
54
+ tmp = _calculate_mapped_snr_f(tmp, snr_db_mean, snr_db_std)
56
55
 
57
56
  for index in data.zero_based_indices:
58
57
  data.truth[offset:offset + data.frame_size, index:index + data.target_fft.bins] = tmp
@@ -60,16 +59,31 @@ def _core(data: Data, mapped: bool, snr: bool) -> Truth:
60
59
  return data.truth
61
60
 
62
61
 
62
+ def _calculate_mapped_snr_f(truth_f: np.ndarray, snr_db_mean: np.ndarray, snr_db_std: np.ndarray) -> np.ndarray:
63
+ """Calculate mapped SNR from standard SNR energy per bin/class."""
64
+ import scipy.special as sc
65
+
66
+ old_err = np.seterr(divide='ignore', invalid='ignore')
67
+ num = 10 * np.log10(np.double(truth_f)) - np.double(snr_db_mean)
68
+ den = np.double(snr_db_std) * np.sqrt(2)
69
+ q = num / den
70
+ q = np.nan_to_num(q, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
71
+ result = 0.5 * (1 + sc.erf(q))
72
+ np.seterr(**old_err)
73
+
74
+ return result.astype(np.float32)
75
+
76
+
63
77
  def energy_f(data: Data) -> Truth:
64
78
  """Frequency domain energy truth generation function
65
79
 
66
- Calculates the true energy per bin:
80
+ Calculates the true energy per bin:
67
81
 
68
- Ti^2 + Tr^2
82
+ Ti^2 + Tr^2
69
83
 
70
- where T is the target STFT bin values.
84
+ where T is the target STFT bin values.
71
85
 
72
- Output shape: [:, bins]
86
+ Output shape: [:, bins]
73
87
  """
74
88
  return _core(data=data, mapped=False, snr=False)
75
89
 
@@ -77,13 +91,13 @@ Output shape: [:, bins]
77
91
  def snr_f(data: Data) -> Truth:
78
92
  """Frequency domain SNR truth function documentation
79
93
 
80
- Calculates the true SNR per bin:
94
+ Calculates the true SNR per bin:
81
95
 
82
- (Ti^2 + Tr^2) / (Ni^2 + Nr^2)
96
+ (Ti^2 + Tr^2) / (Ni^2 + Nr^2)
83
97
 
84
- where T is the target and N is the noise STFT bin values.
98
+ where T is the target and N is the noise STFT bin values.
85
99
 
86
- Output shape: [:, bins]
100
+ Output shape: [:, bins]
87
101
  """
88
102
  return _core(data=data, mapped=False, snr=True)
89
103
 
@@ -91,7 +105,7 @@ Output shape: [:, bins]
91
105
  def mapped_snr_f(data: Data) -> Truth:
92
106
  """Frequency domain mapped SNR truth function documentation
93
107
 
94
- Output shape: [:, bins]
108
+ Output shape: [:, bins]
95
109
  """
96
110
  return _core(data=data, mapped=True, snr=True)
97
111
 
@@ -99,27 +113,25 @@ Output shape: [:, bins]
99
113
  def energy_t(data: Data) -> Truth:
100
114
  """Time domain energy truth function documentation
101
115
 
102
- Calculates the true time domain energy of each frame:
116
+ Calculates the true time domain energy of each frame:
103
117
 
104
- For OLS:
105
- sum(x[0:N-1]^2) / N
118
+ For OLS:
119
+ sum(x[0:N-1]^2) / N
106
120
 
107
- For OLA:
108
- sum(x[0:R-1]^2) / R
121
+ For OLA:
122
+ sum(x[0:R-1]^2) / R
109
123
 
110
- where x is the target time domain data,
111
- N is the size of the transform, and
112
- R is the number of new samples in the frame.
124
+ where x is the target time domain data,
125
+ N is the size of the transform, and
126
+ R is the number of new samples in the frame.
113
127
 
114
- Output shape: [:, 1]
128
+ Output shape: [:, 1]
115
129
 
116
- Note: feature transforms can be defined to use a subset of all bins,
117
- i.e., subset of 0:128 for N=256 could be 0:127 or 1:128. energy_t
118
- will reflect the total energy over all bins regardless of the feature
119
- transform config.
130
+ Note: feature transforms can be defined to use a subset of all bins,
131
+ i.e., subset of 0:128 for N=256 could be 0:127 or 1:128. energy_t
132
+ will reflect the total energy over all bins regardless of the feature
133
+ transform config.
120
134
  """
121
- import numpy as np
122
-
123
135
  from sonusai import SonusAIError
124
136
 
125
137
  _, target_energy = data.target_fft.execute_all(data.target_audio)
@@ -16,7 +16,6 @@ forward transform window if defined by the feature.
16
16
  Output shape: [:, num_classes]
17
17
  (target stacked real, imag; or real only for tdac-co)
18
18
  """
19
-
20
19
  from sonusai import SonusAIError
21
20
 
22
21
  if data.config.num_classes != data.feature_parameters:
sonusai/utils/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
1
  # SonusAI general utilities
2
2
  from .asl_p56 import asl_p56
3
- from .asr import ASRData
4
3
  from .asr import ASRResult
5
4
  from .asr import calc_asr
5
+ from .asr import get_available_engines
6
6
  from .audio_devices import get_default_input_device
7
7
  from .audio_devices import get_input_device_index_by_name
8
8
  from .audio_devices import get_input_devices
sonusai/utils/asr.py CHANGED
@@ -1,22 +1,10 @@
1
1
  from dataclasses import dataclass
2
- from typing import Any
3
2
  from typing import Callable
4
3
  from typing import Optional
5
4
 
6
5
  from sonusai.mixture import AudioT
7
6
 
8
7
 
9
- @dataclass(frozen=True)
10
- class ASRData:
11
- audio: AudioT
12
- whisper_model: Optional[Any] = None
13
- whisper_model_name: Optional[str] = None
14
- device: Optional[str] = None
15
- cpu_threads: Optional[int] = None
16
- compute_type: Optional[str] = None
17
- beam_size: Optional[int] = None
18
-
19
-
20
8
  @dataclass(frozen=True)
21
9
  class ASRResult:
22
10
  text: str
@@ -28,57 +16,56 @@ class ASRResult:
28
16
  asr_cpu_time: Optional[float] = None
29
17
 
30
18
 
31
- def get_available_engines() -> dict[str, Callable[[ASRData], ASRResult]]:
19
+ def get_available_engines() -> list[str]:
32
20
  from importlib import import_module
33
21
  from pkgutil import iter_modules
34
22
 
35
23
  module = import_module('sonusai.utils.asr_functions')
36
- engines = {method: getattr(module, method) for method in dir(module) if not method.startswith('_')}
24
+ engines = [method for method in dir(module) if not method.startswith('_')]
37
25
  for _, name, _ in iter_modules():
38
26
  if name.startswith('sonusai_asr_'):
39
27
  module = import_module(f'{name}.asr_functions')
40
28
  for method in dir(module):
41
29
  if not method.startswith('_'):
42
- engines[method] = getattr(module, method)
30
+ engines.append(method)
43
31
 
44
32
  return engines
45
33
 
46
34
 
47
- def calc_asr(audio: AudioT | str,
48
- engine: Optional[str] = 'aaware_whisper',
49
- whisper_model: Optional[Any] = None,
50
- whisper_model_name: Optional[str] = 'tiny',
51
- device: Optional[str] = 'cpu',
52
- cpu_threads: Optional[int] = 1,
53
- compute_type: Optional[str] = 'int8',
54
- beam_size: Optional[int] = 5) -> ASRResult:
35
+ def _asr_fn(engine: str) -> Callable[..., ASRResult]:
36
+ from importlib import import_module
37
+ from pkgutil import iter_modules
38
+
39
+ module = import_module('sonusai.utils.asr_functions')
40
+ for method in dir(module):
41
+ if method == engine:
42
+ return getattr(module, method)
43
+
44
+ for _, name, _ in iter_modules():
45
+ if name.startswith('sonusai_asr_'):
46
+ module = import_module(f'{name}.asr_functions')
47
+ for method in dir(module):
48
+ if method == engine:
49
+ return getattr(module, method)
50
+
51
+ raise ValueError(f'engine {engine} not supported')
52
+
53
+
54
+ def calc_asr(audio: AudioT | str, engine: str, **config) -> ASRResult:
55
55
  """Run ASR on audio
56
56
 
57
57
  :param audio: Numpy array of audio samples or location of an audio file
58
- :param engine: Type of ASR engine to use
59
- :param whisper_model: A preloaded Whisper ASR model
60
- :param whisper_model_name: Name of Whisper ASR model to use if none was provided
61
- :param device: the device to put the ASR model into
62
- :param cpu_threads: int specifying threads to use when device is cpu
63
- note: must be 1 if this func is run in parallel
64
- :param compute_type: the precision of ASR model to use
65
- :param beam_size: int specifying beam_size to use
58
+ :param engine: ASR engine to use
59
+ :param config: kwargs configuration parameters
66
60
  :return: ASRResult object containing text and confidence
67
61
  """
68
62
  from copy import copy
69
63
 
70
64
  import numpy as np
71
65
 
72
- from sonusai import SonusAIError
73
66
  from sonusai.mixture import read_audio
74
67
 
75
- available_engines = get_available_engines()
76
- if engine not in available_engines:
77
- raise SonusAIError(f'Unsupported ASR function: {engine}')
78
-
79
68
  if not isinstance(audio, np.ndarray):
80
69
  audio = copy(read_audio(audio))
81
70
 
82
- data = ASRData(audio, whisper_model, whisper_model_name, device, cpu_threads, compute_type, beam_size)
83
-
84
- return available_engines[engine](data)
71
+ return _asr_fn(engine)(audio, **config)
@@ -1,8 +1,8 @@
1
- from sonusai.utils import ASRData
1
+ from sonusai.mixture import AudioT
2
2
  from sonusai.utils import ASRResult
3
3
 
4
4
 
5
- def aaware_whisper(data: ASRData) -> ASRResult:
5
+ def aaware_whisper(audio: AudioT, **_config) -> ASRResult:
6
6
  import tempfile
7
7
  from math import exp
8
8
  from os import getenv
@@ -22,7 +22,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
22
22
 
23
23
  with tempfile.TemporaryDirectory() as tmp:
24
24
  file = join(tmp, 'asr.wav')
25
- write_audio(name=file, audio=float_to_int16(data.audio))
25
+ write_audio(name=file, audio=float_to_int16(audio))
26
26
 
27
27
  files = {'audio_file': (file, open(file, 'rb'), 'audio/wav')}
28
28
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.18.2
3
+ Version: 0.18.5
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -1,67 +1,70 @@
1
- sonusai/__init__.py,sha256=j2eH_QUsIIMm0HDiNHC5HCocWsX_GhtvlmTkT7zyYOw,2918
1
+ sonusai/__init__.py,sha256=s77La_fCT4TJtXR8fT0jHrWgGwOQI5qonyS3LStzsyw,2980
2
2
  sonusai/aawscd_probwrite.py,sha256=GukR5owp_0A3DrqSl9fHWULYgclNft4D5OkHIwfxxkc,3698
3
- sonusai/audiofe.py,sha256=zOySiYs5ZZm60eMbA7RjhG6C0Ouhaii3WfL1d0Q8rxg,11154
4
- sonusai/calc_metric_spenh.py,sha256=SunJD8wkdUxyL0rRZt2auauZBEUzpi0IRY8MtXKh3wo,63645
3
+ sonusai/audiofe.py,sha256=3LssRiL73DH8teihD9f3nCvfZ0a65WQtXCqWGnKHuJM,11157
4
+ sonusai/calc_metric_spenh.py,sha256=ee2xrx6L1lFyWSoQSiq56He3RQ1cF7T_ak-6TjejXsc,47738
5
5
  sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- sonusai/data/genmixdb.yml,sha256=-XSs_hUR6wHJVoTPmSewzXL7u61X-xmHY46lNPatxSE,1025
6
+ sonusai/data/genmixdb.yml,sha256=p7AhJjtQ4S_xtDhVsFEqUi8fmvNzCLCV1LPdoNYvmWo,1043
7
7
  sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
8
8
  sonusai/data/whitenoise.wav,sha256=I2umov0m34y56F9IsIBi1XtE76ZeZaSKDf70cJRe3pI,1920044
9
9
  sonusai/doc/__init__.py,sha256=rP5Hgn0Iys_xkuv4caxngdqehuU4zLZsiKuv8Nde67M,19
10
- sonusai/doc/doc.py,sha256=LOf4HiIxMeQ7-n0ExDSldo1Tt1036SVaN_auwlqXUxQ,21489
10
+ sonusai/doc/doc.py,sha256=8gftMlCaVa0hIA_BjSHbEYgSTq_BAZwC5Ipk-BFgGp8,22919
11
11
  sonusai/doc.py,sha256=l8CaFgLI8mqx4tn0aXfxKqa2dy9GgC0zjYxZAkpmi1E,878
12
12
  sonusai/genft.py,sha256=OzET3iTE-QhrUckzidfZvCDXZlAxIF5Xe5NEf856Vvk,5662
13
- sonusai/genmetrics.py,sha256=fC8KPIB6wtBj_hs1X65lq3dqNTcWeuNs1eT7yXdpxD8,1830
13
+ sonusai/genmetrics.py,sha256=YS5ZuhOR77-wGH8ju7z_ihQSDhYhV_iJWlWWMemLIGU,4998
14
14
  sonusai/genmix.py,sha256=TU5aTebGHsbfwsRbynYbegGBelSma9khuQkDk0dFE3I,7075
15
15
  sonusai/genmixdb.py,sha256=gF2qas1tH9MHEFLoEMrN3kYVm-vhAKaOuZ8ev-w4MQM,19553
16
16
  sonusai/gentcst.py,sha256=W1ZO3xs7CoZkFcvOTH-FLJOIA4I7Wzb0HVRC3hGGSaM,20223
17
- sonusai/lsdb.py,sha256=fMRqPlAu4B-4MsTXX-NaWXYyJ_dAOJlS-LrvQPQQsXg,6028
17
+ sonusai/lsdb.py,sha256=oLyGoq46hkravDUXhI-wwhKdX_GESoOD5-E7nEJl7rc,5996
18
18
  sonusai/main.py,sha256=GC-pQrSqx9tWwIcmEo6V9SraEv5KskBLS_W_wz-f2ZM,2509
19
- sonusai/metrics/__init__.py,sha256=56itZW3S1I7ZYvbxPmFIVPAh1AIJZdljByz1uCrHqFE,635
19
+ sonusai/metrics/__init__.py,sha256=GEenHTytN-_qMAF7UJypqkArudJsHyctGSKd0bKUniQ,807
20
+ sonusai/metrics/calc_audio_stats.py,sha256=u-HIyrqJUCb0G7WR865TcpTVkT2lsCnb2rwB67IgzEc,1308
20
21
  sonusai/metrics/calc_class_weights.py,sha256=dyY7daEIf5Ms5tfTf6wF0fkx_GnMADHOZR_rtsfGoVM,3933
21
22
  sonusai/metrics/calc_optimal_thresholds.py,sha256=9fRfwl-aKAbzHJyqGHv4o8BpZXG9HHB7zUJObHXfYM4,3522
22
23
  sonusai/metrics/calc_pcm.py,sha256=n_QoDwdM09HZx0zMbCkJR07e9fCLo03Ilpi1nqdizRg,1906
23
- sonusai/metrics/calc_pesq.py,sha256=bO2QQXvQOrULTpUkNs0MYgk23tWCJqQ6wH1314jHdz0,853
24
+ sonusai/metrics/calc_pesq.py,sha256=vO3HnQfyzYLy6VILhP5E-Yi6HkI6T9ob_CEfPo6NG50,971
25
+ sonusai/metrics/calc_phase_distance.py,sha256=Oh7UrH4ekm48pIhOnAOmkSr1OWueq2GUlwJheoAQmnk,1905
24
26
  sonusai/metrics/calc_sa_sdr.py,sha256=pup1uzHbSqV3JprYCXmu_hbWILqRME5KAnFJGe4WXCY,2566
25
27
  sonusai/metrics/calc_sample_weights.py,sha256=0O2EH1-FKlCa0HFgKPUF1BJiknR1hCH7zLbXnoXH7Ag,945
26
- sonusai/metrics/calc_wer.py,sha256=RP-K6OcTN-7pxBjy9DQzWLBLzIlLVivsgHedtsr3sfY,2499
27
- sonusai/metrics/calc_wsdr.py,sha256=ug1P8DGSuEvb0OKb97S8ZkXefz7CDUGiAhVG_rt1Wwo,5413
28
+ sonusai/metrics/calc_snr_f.py,sha256=cLRUYeyRuoDtXWOgrgSzFP3EMOBlRaBcPM1BcxpEU_M,1199
29
+ sonusai/metrics/calc_speech.py,sha256=i-BjYiSY-M3YSbKFUOW4m7m4frkF4ikyJgyizhDjGm0,14448
30
+ sonusai/metrics/calc_wer.py,sha256=7VVW_YsbeysKOZOTtvXVAvWi8lb0PZtu-Xg2HiXomWc,2483
31
+ sonusai/metrics/calc_wsdr.py,sha256=9wiCJsGgRGHzBpHv0zSxorMsN_JZNBPyge280Hd1W0w,2531
28
32
  sonusai/metrics/class_summary.py,sha256=4Mb25nuk6eqotnQSFMuOQL3zofGcpNXDfDlPa513ZLQ,3286
29
33
  sonusai/metrics/confusion_matrix_summary.py,sha256=3qg6TMKjJeHtNjj2YnNjPFSlMrQXt0Zcu1dLkGB_aPU,4001
30
34
  sonusai/metrics/one_hot.py,sha256=QSeH_GdqBpOAKLrNnQ8gjcPC-vSdUqC0yPEQueTA6VI,13548
31
35
  sonusai/metrics/snr_summary.py,sha256=P4U5_Xr7v9F8kF-rZBnpsVNt3p42rIVS6zmch8yfVfg,5575
32
- sonusai/mixture/__init__.py,sha256=rFaHyroCf0Fd-SuxmH4dl8xZVjOe8gFjndouv9RtzIE,5347
33
- sonusai/mixture/audio.py,sha256=2lqy0DtTMTYhX4aAOIvVtLNn6QB5ivTa7cJIaAlbfAg,2385
36
+ sonusai/mixture/__init__.py,sha256=5M8wEFLkySuaFUKFLhXGPs9SNt2fLwsr6QqEYi7-eDY,5358
37
+ sonusai/mixture/audio.py,sha256=2u7nnKdKwXRA6YWVwCurst5D6CQinKZj5jlm9LhXe9c,2445
34
38
  sonusai/mixture/augmentation.py,sha256=Blb90tdTwBOj5w9tRcYyS5H67YJuFiXsGqwZWd7ON4g,10468
35
39
  sonusai/mixture/class_count.py,sha256=_wFnVl2yEOnbor7pLg7cYOUeX6nioov-03Cv3SEbh2k,996
36
- sonusai/mixture/config.py,sha256=d2IzZ1samHWGMpoKzSmUwMyAWWhgmyNoxyO8oiUwbsg,22193
37
- sonusai/mixture/constants.py,sha256=ZRM7Z8e6EwnL9RwaMVxks-QokN9KMWxnQzAf9VNxa9M,1408
38
- sonusai/mixture/datatypes.py,sha256=uVJtT2sVGS531pSglhaLLF5hZcI3_0oKQOWmMkrCwqo,8253
40
+ sonusai/mixture/config.py,sha256=Fu6-HkenI_h7BdOTuGX9B-vsbLJD5pvTPd_LAncZgI4,22828
41
+ sonusai/mixture/constants.py,sha256=90qaRIEcmIoS3Od5h_UP0_SkkvG2aE_eYPv6WsIktC0,1427
42
+ sonusai/mixture/datatypes.py,sha256=uYNToIngMB4m0pgmaAFj7H27FvrMCiwaAAXDXkvIB6w,9100
39
43
  sonusai/mixture/db_datatypes.py,sha256=GDYbcSrlgUJsesiUUNnR4s5aBkMgviiNSQDaBcgYX7I,1428
40
44
  sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
41
45
  sonusai/mixture/feature.py,sha256=Rwuf82IoXzhHPGbKYVGcatImF_ssBf_FfvbqghVPXtg,4116
42
- sonusai/mixture/generation.py,sha256=ohZnhtHIrdQDql2OF703NnhK07Ys-1qAjiwrIql-oMw,42694
46
+ sonusai/mixture/generation.py,sha256=W3n6ipI-dxg4Wj6YBJn8RTpFqkAyIXzxwObeFbSLq08,42801
43
47
  sonusai/mixture/helpers.py,sha256=eC9ZysEa-83VLKen_9PKWzr8w9dkHj4lp6rMB2fNLbg,24759
44
48
  sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
45
- sonusai/mixture/mapped_snr_f.py,sha256=Fdf2uw62FvyKvVy5VywaUtPZGO1zCWQsHlte0bwkKPQ,3121
46
- sonusai/mixture/mixdb.py,sha256=s40-NToxNXz3UtiVZW9chHIV-tpqT6u-GbFPg8LfQoc,51644
47
- sonusai/mixture/soundfile_audio.py,sha256=mHa5SIXsu_uE0j3DO52GydRJrvWSzU_nII-7YJfQ6Qo,4154
48
- sonusai/mixture/sox_audio.py,sha256=HT3kYA9TP5QPCuoOJdUMnGVN-qY6q96DGL8zxuog76o,12277
49
+ sonusai/mixture/mixdb.py,sha256=ItnjyLg0bVOnuQqQQWVOUSF3AHs95bvBLXjFFi6zQEc,57490
50
+ sonusai/mixture/soundfile_audio.py,sha256=BwO4lftNvrhoPTJERONcrpxSpM2fjO6kL_e5Ylz742A,4220
51
+ sonusai/mixture/sox_audio.py,sha256=Pu5RLE8lKKlGOzZsbfcVzbDvy7YPGBuGpbD_PxowFqA,12511
49
52
  sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
50
53
  sonusai/mixture/spectral_mask.py,sha256=8AkCwhy-PSdP1Uri9miKZP-bXFYnFcH_c9xZCGrHavU,2071
51
54
  sonusai/mixture/target_class_balancing.py,sha256=NTNiKZH0_PWLooeow0l41CjJKK8ZTMVbUqz9ZkaNtWk,4900
52
55
  sonusai/mixture/targets.py,sha256=wyy5vhLhuN-hqBMBGoziVvEJg3FKFvJFgmEE7_LaV2M,7908
53
56
  sonusai/mixture/tokenized_shell_vars.py,sha256=zIAFvwP2WSvkMAGY7f3SJ4KLXI6IBT-U_e9ptnoo5Hc,4803
54
- sonusai/mixture/torchaudio_audio.py,sha256=KhHeOMsjmbwOaAcoKD61aFvYBYSlA8OysfT5iGn45MA,3010
57
+ sonusai/mixture/torchaudio_audio.py,sha256=V7OUqkiwplQoIIMPBRWBHqsuPHAYYByt5QMWx7cPIJI,3062
55
58
  sonusai/mixture/torchaudio_augmentation.py,sha256=1vEDHI0caL1vrgoY2lAWe4CiHE2jKRuKKH7x23GHw0w,4390
56
59
  sonusai/mixture/truth.py,sha256=Y41pZ52Xkols9LUler0NlgnilUOscBIucmw4GcxXNzU,1612
57
60
  sonusai/mixture/truth_functions/__init__.py,sha256=82lKYHhLy8KW3gHngrocoqwupGVLVsWdIXdYs3vhjOc,359
58
61
  sonusai/mixture/truth_functions/crm.py,sha256=_Vy8UMrOUQXsrM3nutvUMWCpvI8GePr01QFlyqLFd4k,2626
59
62
  sonusai/mixture/truth_functions/data.py,sha256=okFJeOf43NxfdLqWFCBA2pOGqujRlNDYdAcwwR_m8z8,2875
60
- sonusai/mixture/truth_functions/energy.py,sha256=ydMtMLjMloG76DB30ZHQ5tkBVh4dkMJ82XEhKBokmIk,4281
63
+ sonusai/mixture/truth_functions/energy.py,sha256=xWxjrHM1xPitRFML-X7ruqXMmXVII5MKDjU38WO6bG0,4863
61
64
  sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80BJq59LhcBUM,1539
62
65
  sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
63
66
  sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
64
- sonusai/mixture/truth_functions/target.py,sha256=KAsjugDRooOA5BRcHVAbZRgV7l8S5CFg7CZ0XtKZaQ0,5764
67
+ sonusai/mixture/truth_functions/target.py,sha256=gy2OGAV_uIrZunNBRC6kZx3qg48TSzNI_HaXGzPKg3s,5763
65
68
  sonusai/mkwav.py,sha256=zfSyIiQTIK3KV9Ij33jkLhhZIMVYqaROcRQ4S7c4sIo,5364
66
69
  sonusai/onnx_predict.py,sha256=jSxhD2oFyGSTHOGCXbW4fRT-k4SqKOboK2JaDO-yWcs,8737
67
70
  sonusai/plot.py,sha256=ERkmxMM3qjcCDm4LGDQY4fRAncCYAzP7uW8iZ7_brcg,17105
@@ -79,11 +82,11 @@ sonusai/speech/vctk.py,sha256=EAMEBAzjZUI6dw15n-yI2oCN-H4tzM9t4aUVlOxpAbo,1540
79
82
  sonusai/speech/voxceleb.py,sha256=aJGN0yDb2LFLmCKmRzmUEjpZWQ-QGWw6XWOpy9967AI,2686
80
83
  sonusai/summarize_metric_spenh.py,sha256=OiZe_bhCq5esXNhsOkHDD7g4ssYrpENDHvDVoPzV9iw,1822
81
84
  sonusai/tplot.py,sha256=85T6OPZfxVegHBiSuilFpdgCNMEE0VKAuciNy4rCY5Y,14544
82
- sonusai/utils/__init__.py,sha256=h7QrOyEBMUMoIBFKZpNwDG8Jg-1uw3bs-qflB3CXxhU,2257
85
+ sonusai/utils/__init__.py,sha256=UEM7SkO_xxu0VW-u_zI3e8Xih2CX4aqYjqr61LO5EQA,2271
83
86
  sonusai/utils/asl_p56.py,sha256=-bvQpd-jRQVURbkZJpRoyEAq6gTv9Rc3oFDbh5_lcjY,3861
84
- sonusai/utils/asr.py,sha256=6y6VYJizHpuQ3MgKbEQ4t2gofO-MW6Ez23oAd6d23IE,2920
87
+ sonusai/utils/asr.py,sha256=V0-cW56KS6aOSakSVwQIbhkqEkKf5l8T11b8ciNxWDw,2132
85
88
  sonusai/utils/asr_functions/__init__.py,sha256=JyHK67s97bw7QzrlkboWhws4yNytdPatqzLJxfwx-yw,43
86
- sonusai/utils/asr_functions/aaware_whisper.py,sha256=Ew3zb8OfbxEW7q1s-KA7D5eph4SjVSUAJgiLK-vVqhI,1985
89
+ sonusai/utils/asr_functions/aaware_whisper.py,sha256=NPqc62wKhshYecu368Yvl2xcImp2a_kV3Q229B2gyOI,1992
87
90
  sonusai/utils/audio_devices.py,sha256=LgaXTln1oRArBzaet3rZiIO2plgtaThuGBc3sJ_sLlo,1414
88
91
  sonusai/utils/braced_glob.py,sha256=Z_XIpPK17QiP1JbzAnUC5w3oyG8ZovoyM22Wh-Q_vWU,1675
89
92
  sonusai/utils/calculate_input_shape.py,sha256=63ILxibYKuTQozY83QN8Y2OOhBEbW_1X47Q0askcHDM,984
@@ -116,7 +119,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2z
116
119
  sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
117
120
  sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
118
121
  sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
119
- sonusai-0.18.2.dist-info/METADATA,sha256=PNDYtM4HDRWpp3GBcuos7jymdaKRoXOT7DlLn-fs8XE,2591
120
- sonusai-0.18.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
121
- sonusai-0.18.2.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
122
- sonusai-0.18.2.dist-info/RECORD,,
122
+ sonusai-0.18.5.dist-info/METADATA,sha256=clPOU1dSbKMvviVaA2NxTWHXGf346J8t1EeQ5jOXVIU,2591
123
+ sonusai-0.18.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
124
+ sonusai-0.18.5.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
125
+ sonusai-0.18.5.dist-info/RECORD,,
@@ -1,100 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- def calculate_snr_f_statistics(truth_f: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
5
- """Calculate statistics of snr_f truth data.
6
-
7
- For now, includes mean and standard deviation of the raw values (usually energy)
8
- and mean and standard deviation of the dB values (10 * log10).
9
- """
10
- return (
11
- calculate_snr_mean(truth_f),
12
- calculate_snr_std(truth_f),
13
- calculate_snr_db_mean(truth_f),
14
- calculate_snr_db_std(truth_f),
15
- )
16
-
17
-
18
- def calculate_snr_mean(truth_f: np.ndarray) -> np.ndarray:
19
- """Calculate mean of snr_f truth data."""
20
- snr_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
21
-
22
- for c in range(truth_f.shape[1]):
23
- tmp_truth = truth_f[:, c]
24
- tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
25
-
26
- if len(tmp) == 0:
27
- snr_mean[c] = -np.inf
28
- else:
29
- snr_mean[c] = np.mean(tmp)
30
-
31
- return snr_mean
32
-
33
-
34
- def calculate_snr_std(truth_f: np.ndarray) -> np.ndarray:
35
- """Calculate standard deviation of snr_f truth data."""
36
- snr_std = np.zeros(truth_f.shape[1], dtype=np.float32)
37
-
38
- for c in range(truth_f.shape[1]):
39
- tmp_truth = truth_f[:, c]
40
- tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
41
-
42
- if len(tmp) == 0:
43
- snr_std[c] = -np.inf
44
- else:
45
- snr_std[c] = np.std(tmp, ddof=1)
46
-
47
- return snr_std
48
-
49
-
50
- def calculate_snr_db_mean(truth_f: np.ndarray) -> np.ndarray:
51
- """Calculate dB mean of snr_f truth data."""
52
- snr_db_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
53
-
54
- for c in range(truth_f.shape[1]):
55
- tmp_truth = truth_f[:, c]
56
- tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
57
-
58
- tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
59
- tmp2 = tmp2[np.isfinite(tmp2)]
60
-
61
- if len(tmp2) == 0:
62
- snr_db_mean[c] = -np.inf
63
- else:
64
- snr_db_mean[c] = np.mean(tmp2)
65
-
66
- return snr_db_mean
67
-
68
-
69
- def calculate_snr_db_std(truth_f: np.ndarray) -> np.ndarray:
70
- """Calculate dB standard deviation of snr_f truth data."""
71
- snr_db_std = np.zeros(truth_f.shape[1], dtype=np.float32)
72
-
73
- for c in range(truth_f.shape[1]):
74
- tmp_truth = truth_f[:, c]
75
- tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
76
-
77
- tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
78
- tmp2 = tmp2[np.isfinite(tmp2)]
79
-
80
- if len(tmp2) == 0:
81
- snr_db_std[c] = -np.inf
82
- else:
83
- snr_db_std[c] = np.std(tmp2, ddof=1)
84
-
85
- return snr_db_std
86
-
87
-
88
- def calculate_mapped_snr_f(truth_f: np.ndarray, snr_db_mean: np.ndarray, snr_db_std: np.ndarray) -> np.ndarray:
89
- """Calculate mapped SNR from standard SNR energy per bin/class."""
90
- import scipy.special as sc
91
-
92
- old_err = np.seterr(divide='ignore', invalid='ignore')
93
- num = 10 * np.log10(np.double(truth_f)) - np.double(snr_db_mean)
94
- den = np.double(snr_db_std) * np.sqrt(2)
95
- q = num / den
96
- q = np.nan_to_num(q, nan=-np.inf, posinf=np.inf, neginf=-np.inf)
97
- mapped_snr_f = 0.5 * (1 + sc.erf(q))
98
- np.seterr(**old_err)
99
-
100
- return mapped_snr_f.astype(np.float32)