sonusai 0.15.9__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +36 -4
- sonusai/audiofe.py +111 -106
- sonusai/calc_metric_spenh.py +38 -22
- sonusai/genft.py +15 -6
- sonusai/genmix.py +14 -6
- sonusai/genmixdb.py +15 -7
- sonusai/gentcst.py +13 -6
- sonusai/lsdb.py +15 -5
- sonusai/main.py +58 -61
- sonusai/mixture/__init__.py +1 -0
- sonusai/mixture/config.py +1 -2
- sonusai/mkmanifest.py +43 -8
- sonusai/mkwav.py +15 -6
- sonusai/onnx_predict.py +16 -6
- sonusai/plot.py +16 -6
- sonusai/post_spenh_targetf.py +13 -6
- sonusai/summarize_metric_spenh.py +71 -0
- sonusai/tplot.py +14 -6
- sonusai/utils/__init__.py +4 -7
- sonusai/utils/asl_p56.py +3 -3
- sonusai/utils/asr.py +35 -8
- sonusai/utils/asr_functions/__init__.py +0 -5
- sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- sonusai/utils/asr_manifest_functions/__init__.py +1 -0
- sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
- sonusai/utils/{trim_docstring.py → docstring.py} +20 -0
- sonusai/utils/model_utils.py +30 -0
- sonusai/utils/onnx_utils.py +19 -45
- {sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/METADATA +7 -25
- {sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/RECORD +32 -46
- sonusai/data_generator/__init__.py +0 -5
- sonusai/data_generator/dataset_from_mixdb.py +0 -143
- sonusai/data_generator/keras_from_mixdb.py +0 -169
- sonusai/data_generator/torch_from_mixdb.py +0 -122
- sonusai/keras_onnx.py +0 -86
- sonusai/keras_predict.py +0 -231
- sonusai/keras_train.py +0 -334
- sonusai/torchl_onnx.py +0 -216
- sonusai/torchl_predict.py +0 -542
- sonusai/torchl_train.py +0 -223
- sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
- sonusai/utils/asr_functions/data.py +0 -16
- sonusai/utils/asr_functions/deepgram.py +0 -97
- sonusai/utils/asr_functions/fastwhisper.py +0 -90
- sonusai/utils/asr_functions/google.py +0 -95
- sonusai/utils/asr_functions/whisper.py +0 -49
- sonusai/utils/keras_utils.py +0 -226
- {sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/WHEEL +0 -0
- {sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/entry_points.txt +0 -0
sonusai/utils/asr.py
CHANGED
@@ -1,10 +1,22 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
2
|
from typing import Any
|
3
|
+
from typing import Callable
|
3
4
|
from typing import Optional
|
4
5
|
|
5
6
|
from sonusai.mixture import AudioT
|
6
7
|
|
7
8
|
|
9
|
+
@dataclass(frozen=True)
|
10
|
+
class ASRData:
|
11
|
+
audio: AudioT
|
12
|
+
whisper_model: Optional[Any] = None
|
13
|
+
whisper_model_name: Optional[str] = None
|
14
|
+
device: Optional[str] = None
|
15
|
+
cpu_threads: Optional[int] = None
|
16
|
+
compute_type: Optional[str] = None
|
17
|
+
beam_size: Optional[int] = None
|
18
|
+
|
19
|
+
|
8
20
|
@dataclass(frozen=True)
|
9
21
|
class ASRResult:
|
10
22
|
text: str
|
@@ -16,8 +28,24 @@ class ASRResult:
|
|
16
28
|
asr_cpu_time: Optional[float] = None
|
17
29
|
|
18
30
|
|
31
|
+
def get_available_engines() -> dict[str, Callable[[ASRData], ASRResult]]:
|
32
|
+
from importlib import import_module
|
33
|
+
from pkgutil import iter_modules
|
34
|
+
|
35
|
+
module = import_module('sonusai.utils.asr_functions')
|
36
|
+
engines = {method: getattr(module, method) for method in dir(module) if not method.startswith('_')}
|
37
|
+
for _, name, _ in iter_modules():
|
38
|
+
if name.startswith('sonusai_asr_'):
|
39
|
+
module = import_module(f'{name}.asr_functions')
|
40
|
+
for method in dir(module):
|
41
|
+
if not method.startswith('_'):
|
42
|
+
engines[method] = getattr(module, method)
|
43
|
+
|
44
|
+
return engines
|
45
|
+
|
46
|
+
|
19
47
|
def calc_asr(audio: AudioT | str,
|
20
|
-
engine: Optional[str] = '
|
48
|
+
engine: Optional[str] = 'aaware_whisper',
|
21
49
|
whisper_model: Optional[Any] = None,
|
22
50
|
whisper_model_name: Optional[str] = 'tiny',
|
23
51
|
device: Optional[str] = 'cpu',
|
@@ -43,15 +71,14 @@ def calc_asr(audio: AudioT | str,
|
|
43
71
|
|
44
72
|
from sonusai import SonusAIError
|
45
73
|
from sonusai.mixture import read_audio
|
46
|
-
|
47
|
-
|
74
|
+
|
75
|
+
available_engines = get_available_engines()
|
76
|
+
if engine not in available_engines:
|
77
|
+
raise SonusAIError(f'Unsupported ASR function: {engine}')
|
48
78
|
|
49
79
|
if not isinstance(audio, np.ndarray):
|
50
80
|
audio = copy(read_audio(audio))
|
51
81
|
|
52
|
-
data =
|
82
|
+
data = ASRData(audio, whisper_model, whisper_model_name, device, cpu_threads, compute_type, beam_size)
|
53
83
|
|
54
|
-
|
55
|
-
return getattr(asr_functions, engine)(data)
|
56
|
-
except AttributeError:
|
57
|
-
raise SonusAIError(f'Unsupported ASR function: {engine}')
|
84
|
+
return available_engines[engine](data)
|
@@ -1,8 +1,8 @@
|
|
1
|
+
from sonusai.utils import ASRData
|
1
2
|
from sonusai.utils import ASRResult
|
2
|
-
from sonusai.utils.asr_functions.data import Data
|
3
3
|
|
4
4
|
|
5
|
-
def aaware_whisper(data:
|
5
|
+
def aaware_whisper(data: ASRData) -> ASRResult:
|
6
6
|
import tempfile
|
7
7
|
from math import exp
|
8
8
|
from os import getenv
|
@@ -4,3 +4,4 @@ from .librispeech import collect_librispeech_transcripts
|
|
4
4
|
from .librispeech import get_librispeech_manifest_entry
|
5
5
|
from .vctk_noisy_speech import collect_vctk_noisy_speech_transcripts
|
6
6
|
from .vctk_noisy_speech import get_vctk_noisy_speech_manifest_entry
|
7
|
+
from .mcgill_speech import get_mcgill_speech_manifest_entry
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from sonusai.utils.asr_manifest_functions import PathInfo
|
2
|
+
|
3
|
+
|
4
|
+
def get_mcgill_speech_manifest_entry(entry: PathInfo, transcript_data: list[str]) -> dict:
|
5
|
+
from os.path import splitext
|
6
|
+
from os.path import basename
|
7
|
+
from subprocess import check_output
|
8
|
+
|
9
|
+
from sonusai import SonusAIError
|
10
|
+
|
11
|
+
name = splitext(entry.abs_path)[0]
|
12
|
+
duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
|
13
|
+
# i.e., from MA01_02.wav, get 01_02
|
14
|
+
promptname = basename(name)[2:]
|
15
|
+
# paragraph num
|
16
|
+
pnum = int(promptname[0:2])
|
17
|
+
snum = int(promptname[3:5])
|
18
|
+
idx = 11 * (pnum - 1) + (snum - 1)
|
19
|
+
try:
|
20
|
+
# remove prompt-id prefix and \n suffix
|
21
|
+
text = transcript_data[idx][6:-1]
|
22
|
+
except IndexError:
|
23
|
+
raise SonusAIError(f'Could not find {promptname}, idx {idx} in transcript data')
|
24
|
+
|
25
|
+
return {
|
26
|
+
'audio_filepath': entry.audio_filepath,
|
27
|
+
'text': text,
|
28
|
+
'duration': duration,
|
29
|
+
}
|
@@ -28,3 +28,23 @@ def trim_docstring(docstring: str) -> str:
|
|
28
28
|
|
29
29
|
# Return a single string
|
30
30
|
return '\n'.join(trimmed)
|
31
|
+
|
32
|
+
|
33
|
+
def add_commands_to_docstring(docstring: str, plugin_docstrings: list[str]) -> str:
|
34
|
+
"""Add commands to docstring"""
|
35
|
+
import sonusai
|
36
|
+
|
37
|
+
lines = docstring.splitlines()
|
38
|
+
|
39
|
+
start = lines.index('The sonusai commands are:')
|
40
|
+
end = lines.index('', start)
|
41
|
+
|
42
|
+
commands = sonusai.commands_doc.splitlines()
|
43
|
+
for plugin_docstring in plugin_docstrings:
|
44
|
+
commands.extend(plugin_docstring.splitlines())
|
45
|
+
commands.sort()
|
46
|
+
commands = list(filter(None, commands))
|
47
|
+
|
48
|
+
lines = lines[:start + 1] + commands + lines[end:]
|
49
|
+
|
50
|
+
return '\n'.join(lines)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
|
4
|
+
def import_module(name: str) -> Any:
|
5
|
+
"""Import a Python module adding the module file's directory to the Python system path so that relative package
|
6
|
+
imports are found correctly.
|
7
|
+
"""
|
8
|
+
import os
|
9
|
+
import sys
|
10
|
+
from importlib import import_module
|
11
|
+
|
12
|
+
from sonusai import SonusAIError
|
13
|
+
|
14
|
+
try:
|
15
|
+
path = os.path.dirname(name)
|
16
|
+
if len(path) < 1:
|
17
|
+
path = './'
|
18
|
+
|
19
|
+
# Add model file location to system path
|
20
|
+
sys.path.append(os.path.abspath(path))
|
21
|
+
|
22
|
+
try:
|
23
|
+
root = os.path.splitext(os.path.basename(name))[0]
|
24
|
+
model = import_module(root)
|
25
|
+
except Exception as e:
|
26
|
+
raise SonusAIError(f'Error: could not import model from {name}: {e}.')
|
27
|
+
except Exception as e:
|
28
|
+
raise SonusAIError(f'Error: could not find {name}: {e}.')
|
29
|
+
|
30
|
+
return model
|
sonusai/utils/onnx_utils.py
CHANGED
@@ -3,33 +3,15 @@ from dataclasses import dataclass
|
|
3
3
|
from onnxruntime import InferenceSession
|
4
4
|
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
for i in range(len(keras_model.layers)):
|
16
|
-
layer = keras_model.layers[i]
|
17
|
-
if isinstance(layer, GRU):
|
18
|
-
if layer.stateful:
|
19
|
-
stateful_gru_names.append(layer.name)
|
20
|
-
|
21
|
-
for node_index in range(len(onnx_model.graph.node)):
|
22
|
-
node = onnx_model.graph.node[node_index]
|
23
|
-
replace = False
|
24
|
-
if node.op_type == 'GRU':
|
25
|
-
for i in node.input:
|
26
|
-
for n in stateful_gru_names:
|
27
|
-
if n in i:
|
28
|
-
replace = True
|
29
|
-
if node.name in stateful_gru_names or replace:
|
30
|
-
node.op_type = 'SGRU'
|
31
|
-
|
32
|
-
return onnx_model
|
6
|
+
@dataclass(frozen=True)
|
7
|
+
class SonusAIMetaData:
|
8
|
+
input_shape: list[int]
|
9
|
+
output_shape: list[int]
|
10
|
+
flattened: bool
|
11
|
+
timestep: bool
|
12
|
+
channel: bool
|
13
|
+
mutex: bool
|
14
|
+
feature: str
|
33
15
|
|
34
16
|
|
35
17
|
def add_sonusai_metadata(model,
|
@@ -38,13 +20,14 @@ def add_sonusai_metadata(model,
|
|
38
20
|
has_channel: bool = False,
|
39
21
|
is_mutex: bool = True,
|
40
22
|
feature: str = ''):
|
41
|
-
"""Add SonusAI metadata to ONNX model.
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
23
|
+
"""Add SonusAI metadata to an ONNX model.
|
24
|
+
|
25
|
+
:param model: ONNX model
|
26
|
+
:param is_flattened: Model feature data is flattened
|
27
|
+
:param has_timestep: Model has timestep dimension
|
28
|
+
:param has_channel: Model has channel dimension
|
29
|
+
:param is_mutex: Model label output is mutually exclusive
|
30
|
+
:param feature: Model feature type
|
48
31
|
"""
|
49
32
|
is_flattened_flag = model.metadata_props.add()
|
50
33
|
is_flattened_flag.key = 'is_flattened'
|
@@ -69,18 +52,9 @@ def add_sonusai_metadata(model,
|
|
69
52
|
return model
|
70
53
|
|
71
54
|
|
72
|
-
@dataclass(frozen=True)
|
73
|
-
class SonusAIMetaData:
|
74
|
-
input_shape: list[int]
|
75
|
-
output_shape: list[int]
|
76
|
-
flattened: bool
|
77
|
-
timestep: bool
|
78
|
-
channel: bool
|
79
|
-
mutex: bool
|
80
|
-
feature: str
|
81
|
-
|
82
|
-
|
83
55
|
def get_sonusai_metadata(model: InferenceSession) -> SonusAIMetaData:
|
56
|
+
"""Get SonusAI metadata from an ONNX model.
|
57
|
+
"""
|
84
58
|
m = model.get_modelmeta().custom_metadata_map
|
85
59
|
return SonusAIMetaData(input_shape=model.get_inputs()[0].shape,
|
86
60
|
output_shape=model.get_outputs()[0].shape,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.1
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -15,57 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.10
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
17
17
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
18
|
-
Requires-Dist: aixplain (>=0.2.6,<0.3.0)
|
19
|
-
Requires-Dist: bitarray (>=2.9.2,<3.0.0)
|
20
|
-
Requires-Dist: ctranslate2 (==4.1.0)
|
21
18
|
Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
|
22
|
-
Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
|
23
19
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
24
|
-
Requires-Dist: einops (>=0.7.0,<0.8.0)
|
25
|
-
Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
|
26
|
-
Requires-Dist: geomloss (>=0.2.6,<0.3.0)
|
27
20
|
Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
28
|
-
Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
|
29
21
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
30
|
-
Requires-Dist: keras (>=3.1.1,<4.0.0)
|
31
|
-
Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
|
32
22
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
33
|
-
Requires-Dist: lightning (>=2.2,<2.3)
|
34
23
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
35
|
-
Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
|
36
24
|
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
37
25
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
38
26
|
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
39
27
|
Requires-Dist: pandas (>=2.1.1,<3.0.0)
|
40
28
|
Requires-Dist: pesq (>=0.0.4,<0.0.5)
|
41
|
-
Requires-Dist: pyaaware (>=1.5.
|
29
|
+
Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
|
42
30
|
Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
|
43
31
|
Requires-Dist: pydub (>=0.25.1,<0.26.0)
|
44
32
|
Requires-Dist: pystoi (>=0.4.0,<0.5.0)
|
45
|
-
Requires-Dist: python-magic (>=0.4.27,<0.5.0)
|
46
33
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
47
|
-
Requires-Dist: sacrebleu (>=2.4.2,<3.0.0)
|
48
34
|
Requires-Dist: samplerate (>=0.2.1,<0.3.0)
|
49
35
|
Requires-Dist: soundfile (>=0.12.1,<0.13.0)
|
50
36
|
Requires-Dist: sox (>=1.4.1,<2.0.0)
|
51
|
-
Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
|
52
|
-
Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
|
53
|
-
Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
|
54
37
|
Requires-Dist: torch (>=2.2,<2.3)
|
55
38
|
Requires-Dist: torchaudio (>=2.2,<2.3)
|
56
|
-
Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
|
57
39
|
Requires-Dist: tqdm (>=4.66.1,<5.0.0)
|
58
40
|
Description-Content-Type: text/x-rst
|
59
41
|
|
60
|
-
|
42
|
+
SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
|
61
43
|
|
62
|
-
|
44
|
+
SonusAI includes functions for pre-processing training and validation data and
|
63
45
|
creating performance metrics reports for key types of Keras models:
|
64
46
|
- recurrent, convolutional, or a combination (i.e. RCNNs)
|
65
47
|
- binary, multiclass single-label, multiclass multi-label, and regression
|
66
48
|
- training with data augmentations: noise mixing, pitch and time stretch, etc.
|
67
49
|
|
68
|
-
|
69
|
-
- Aaware Inc. sonusai
|
70
|
-
- Keras model scripts: User python scripts for
|
50
|
+
SonusAI python functions are used by:
|
51
|
+
- Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
|
52
|
+
- Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
|
71
53
|
|
@@ -1,27 +1,20 @@
|
|
1
|
-
sonusai/__init__.py,sha256=
|
1
|
+
sonusai/__init__.py,sha256=vzTFfRB-NeO-Sm3puySDJOybk3ND_Oj6w0EejQPmH1U,2978
|
2
2
|
sonusai/aawscd_probwrite.py,sha256=GukR5owp_0A3DrqSl9fHWULYgclNft4D5OkHIwfxxkc,3698
|
3
|
-
sonusai/audiofe.py,sha256=
|
4
|
-
sonusai/calc_metric_spenh.py,sha256=
|
3
|
+
sonusai/audiofe.py,sha256=3IhkQhNt2DfYDe8TxLF5x8NGFwPdOtYSzgE66joTFJg,10516
|
4
|
+
sonusai/calc_metric_spenh.py,sha256=O4D5VeJ68Ko4UVsxAra0J7a6LUBcqFwDsLbtc4vKGAg,61833
|
5
5
|
sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
sonusai/data/genmixdb.yml,sha256=-XSs_hUR6wHJVoTPmSewzXL7u61X-xmHY46lNPatxSE,1025
|
7
7
|
sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
|
8
8
|
sonusai/data/whitenoise.wav,sha256=I2umov0m34y56F9IsIBi1XtE76ZeZaSKDf70cJRe3pI,1920044
|
9
|
-
sonusai/data_generator/__init__.py,sha256=ouCpY5EDV35fKFeKGQfIcU8uE-c3QcuNerTxUA1X5L8,232
|
10
|
-
sonusai/data_generator/dataset_from_mixdb.py,sha256=D14L8BL7a0WgkF8a8eogQ9Hk9ow4_RK3QBGsZ-HDAog,5493
|
11
|
-
sonusai/data_generator/keras_from_mixdb.py,sha256=14r89aX6Dr9ZKsmMRC7HDXbJrPrCZC1liwwLmZUKj0w,6182
|
12
|
-
sonusai/data_generator/torch_from_mixdb.py,sha256=lvEe9DDu_rIaoyhv9PW4UAnAWp5N74L8kRfxUhsh7oo,4279
|
13
9
|
sonusai/doc/__init__.py,sha256=rP5Hgn0Iys_xkuv4caxngdqehuU4zLZsiKuv8Nde67M,19
|
14
10
|
sonusai/doc/doc.py,sha256=3z210v6ZckuOlsGZ3ySQBdlCNmBp2M1ahqhqG_eUN58,22664
|
15
11
|
sonusai/doc.py,sha256=l8CaFgLI8mqx4tn0aXfxKqa2dy9GgC0zjYxZAkpmi1E,878
|
16
|
-
sonusai/genft.py,sha256=
|
17
|
-
sonusai/genmix.py,sha256=
|
18
|
-
sonusai/genmixdb.py,sha256=
|
19
|
-
sonusai/gentcst.py,sha256=
|
20
|
-
sonusai/
|
21
|
-
sonusai/
|
22
|
-
sonusai/keras_train.py,sha256=8_M5vY-CkonPzbOtOF3Vk-wox-42o8fkaOKLjk7Oc2k,13226
|
23
|
-
sonusai/lsdb.py,sha256=TTMQ-0H8fFzUSczt6yjy-9xUjZSdIGQzTVH5Xr6XPSA,5941
|
24
|
-
sonusai/main.py,sha256=KjN0dCI6rWare4wo_ACzTlURW7pvTw03n51pH7EyLAU,3108
|
12
|
+
sonusai/genft.py,sha256=OzET3iTE-QhrUckzidfZvCDXZlAxIF5Xe5NEf856Vvk,5662
|
13
|
+
sonusai/genmix.py,sha256=TU5aTebGHsbfwsRbynYbegGBelSma9khuQkDk0dFE3I,7075
|
14
|
+
sonusai/genmixdb.py,sha256=M67Y_SEysgHfTmHHOdOjxdpuryTMDNgbDteCzR1uLk8,19669
|
15
|
+
sonusai/gentcst.py,sha256=W1ZO3xs7CoZkFcvOTH-FLJOIA4I7Wzb0HVRC3hGGSaM,20223
|
16
|
+
sonusai/lsdb.py,sha256=fMRqPlAu4B-4MsTXX-NaWXYyJ_dAOJlS-LrvQPQQsXg,6028
|
17
|
+
sonusai/main.py,sha256=GC-pQrSqx9tWwIcmEo6V9SraEv5KskBLS_W_wz-f2ZM,2509
|
25
18
|
sonusai/metrics/__init__.py,sha256=56itZW3S1I7ZYvbxPmFIVPAh1AIJZdljByz1uCrHqFE,635
|
26
19
|
sonusai/metrics/calc_class_weights.py,sha256=dyY7daEIf5Ms5tfTf6wF0fkx_GnMADHOZR_rtsfGoVM,3933
|
27
20
|
sonusai/metrics/calc_optimal_thresholds.py,sha256=9fRfwl-aKAbzHJyqGHv4o8BpZXG9HHB7zUJObHXfYM4,3522
|
@@ -35,11 +28,11 @@ sonusai/metrics/class_summary.py,sha256=4Mb25nuk6eqotnQSFMuOQL3zofGcpNXDfDlPa513
|
|
35
28
|
sonusai/metrics/confusion_matrix_summary.py,sha256=3qg6TMKjJeHtNjj2YnNjPFSlMrQXt0Zcu1dLkGB_aPU,4001
|
36
29
|
sonusai/metrics/one_hot.py,sha256=QSeH_GdqBpOAKLrNnQ8gjcPC-vSdUqC0yPEQueTA6VI,13548
|
37
30
|
sonusai/metrics/snr_summary.py,sha256=P4U5_Xr7v9F8kF-rZBnpsVNt3p42rIVS6zmch8yfVfg,5575
|
38
|
-
sonusai/mixture/__init__.py,sha256=
|
31
|
+
sonusai/mixture/__init__.py,sha256=BfSJL91URq8-JDlbtpc5SQoQRWEUXvxKozbuquX4Mok,5326
|
39
32
|
sonusai/mixture/audio.py,sha256=S-ZROf5rVvwv1TCEuwJHz1FfX4oVubb4QhbybUMMqtM,2150
|
40
33
|
sonusai/mixture/augmentation.py,sha256=Blb90tdTwBOj5w9tRcYyS5H67YJuFiXsGqwZWd7ON4g,10468
|
41
34
|
sonusai/mixture/class_count.py,sha256=_wFnVl2yEOnbor7pLg7cYOUeX6nioov-03Cv3SEbh2k,996
|
42
|
-
sonusai/mixture/config.py,sha256=
|
35
|
+
sonusai/mixture/config.py,sha256=d2IzZ1samHWGMpoKzSmUwMyAWWhgmyNoxyO8oiUwbsg,22193
|
43
36
|
sonusai/mixture/constants.py,sha256=xjCskcQi6khqYZDf7j6z1OkeN1C6wE06kBBapcJiNI4,1428
|
44
37
|
sonusai/mixture/datatypes.py,sha256=zaxfOHw8ddt-i8JPYOPnlqWz_EHBEDoO4q2VAqJViHM,8173
|
45
38
|
sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
|
@@ -67,31 +60,24 @@ sonusai/mixture/truth_functions/file.py,sha256=jOJuC_3y9BH6GGOp9eKcbVrHLVRzUA80B
|
|
67
60
|
sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sDjQZdtJKGzt5wA,479
|
68
61
|
sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
|
69
62
|
sonusai/mixture/truth_functions/target.py,sha256=KAsjugDRooOA5BRcHVAbZRgV7l8S5CFg7CZ0XtKZaQ0,5764
|
70
|
-
sonusai/mkmanifest.py,sha256=
|
71
|
-
sonusai/mkwav.py,sha256=
|
72
|
-
sonusai/onnx_predict.py,sha256=
|
73
|
-
sonusai/plot.py,sha256=
|
74
|
-
sonusai/post_spenh_targetf.py,sha256=
|
63
|
+
sonusai/mkmanifest.py,sha256=7lfK7YOdgAEP_Lxrf-YDxZ5iLH9MJuaOltBVpav2M9M,8705
|
64
|
+
sonusai/mkwav.py,sha256=kLfC2ZuF-t8P97nqYw2falTZpymxAeXv0YTJCe6nK10,5356
|
65
|
+
sonusai/onnx_predict.py,sha256=6Sf-3juIhf_CQlZaL0rDAyV0oouhJvPkR1NzstjpI6I,9151
|
66
|
+
sonusai/plot.py,sha256=ERkmxMM3qjcCDm4LGDQY4fRAncCYAzP7uW8iZ7_brcg,17105
|
67
|
+
sonusai/post_spenh_targetf.py,sha256=xOz5T6WZuyTHmfbtILIY9skgH064Wvi2GF2Bo5L3YMU,4998
|
75
68
|
sonusai/queries/__init__.py,sha256=oKY5JeqZ4Cz7DwCwPc1_ydB8bUs6KaMcWFp_w02TjOs,255
|
76
69
|
sonusai/queries/queries.py,sha256=FNMUKnoY_Ya9S5sNhsB8ppwy0B7V55ilbbjhQRv_UN8,7552
|
77
|
-
sonusai/
|
78
|
-
sonusai/
|
79
|
-
sonusai/
|
80
|
-
sonusai/
|
81
|
-
sonusai/utils/
|
82
|
-
sonusai/utils/
|
83
|
-
sonusai/utils/
|
84
|
-
sonusai/utils/
|
85
|
-
sonusai/utils/asr_functions/aaware_whisper.py,sha256=6JnF8-a-39dYk4gVILWRUD5Ou98T6wPe9g4m8c6MF1A,1994
|
86
|
-
sonusai/utils/asr_functions/aixplain_whisper.py,sha256=Fdg3z4HSw1zBu2CQGVU-e8xM3PLTh1Vn4c6fawBOWwQ,1694
|
87
|
-
sonusai/utils/asr_functions/data.py,sha256=FQn-y63aOUooOjgKtkzkpFz5Gm9dwn3JPwcR0gC7fYA,418
|
88
|
-
sonusai/utils/asr_functions/deepgram.py,sha256=sxFRDJn5EjSLv01G2lMoxXFHzXqFnA9ln4VcQWqGP80,5237
|
89
|
-
sonusai/utils/asr_functions/fastwhisper.py,sha256=cyjC3U4UmBZstIvmpuUbNfrcgFFQd1-FLipDaK3_IpU,2856
|
90
|
-
sonusai/utils/asr_functions/google.py,sha256=swzEKdod6EwGUKX8LJYcZPf6hJpuvsp9N6QJi7ip0yQ,3320
|
91
|
-
sonusai/utils/asr_functions/whisper.py,sha256=IuOodbJY1RsiS3o18cVuBcIXn28TCGQz2BCFR1Up08s,1033
|
92
|
-
sonusai/utils/asr_manifest_functions/__init__.py,sha256=Lz12aCGvfngZkLoUxHSqFjHc4wig4ZjQyFbWPWysgrI,310
|
70
|
+
sonusai/summarize_metric_spenh.py,sha256=OiZe_bhCq5esXNhsOkHDD7g4ssYrpENDHvDVoPzV9iw,1822
|
71
|
+
sonusai/tplot.py,sha256=85T6OPZfxVegHBiSuilFpdgCNMEE0VKAuciNy4rCY5Y,14544
|
72
|
+
sonusai/utils/__init__.py,sha256=TCXlcW8W0Up2f5ciSgz3DabvH1MxrrWD0LK6pQTJkeA,2215
|
73
|
+
sonusai/utils/asl_p56.py,sha256=-bvQpd-jRQVURbkZJpRoyEAq6gTv9Rc3oFDbh5_lcjY,3861
|
74
|
+
sonusai/utils/asr.py,sha256=6y6VYJizHpuQ3MgKbEQ4t2gofO-MW6Ez23oAd6d23IE,2920
|
75
|
+
sonusai/utils/asr_functions/__init__.py,sha256=JyHK67s97bw7QzrlkboWhws4yNytdPatqzLJxfwx-yw,43
|
76
|
+
sonusai/utils/asr_functions/aaware_whisper.py,sha256=LzO9CZV0wBWkjmCR2nSWN_AW9UJwriAsC1OYSlfVeT8,1981
|
77
|
+
sonusai/utils/asr_manifest_functions/__init__.py,sha256=V-w4R7SHUyoeDuMR3tS12j6DGhmfTlUibMKgq1c6ga0,370
|
93
78
|
sonusai/utils/asr_manifest_functions/data.py,sha256=mJsaHccBReguOJu9qsshRhL-3GbeyqM0-PXMseFnZbE,151
|
94
79
|
sonusai/utils/asr_manifest_functions/librispeech.py,sha256=HIaytcYmjRUkuR6fCQlv3Jh3IDWSox_A6WFcFFAHN9M,1635
|
80
|
+
sonusai/utils/asr_manifest_functions/mcgill_speech.py,sha256=2uF9qgBwcue9W9dlRo16Kvr3jX5g53AGjW2rwob-Cqk,957
|
95
81
|
sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py,sha256=-69lM0dz18KbU5_-dmSeqDoNNwgJj4UlxgGkNBEi3wM,2169
|
96
82
|
sonusai/utils/audio_devices.py,sha256=LgaXTln1oRArBzaet3rZiIO2plgtaThuGBc3sJ_sLlo,1414
|
97
83
|
sonusai/utils/braced_glob.py,sha256=h4hab7YDbM4CjLg9iSzyHZrkd22IPUOY5zZqHdifkh8,1510
|
@@ -101,16 +87,17 @@ sonusai/utils/create_timestamp.py,sha256=TxoQXWZ3SFdBEHLOv-ujeIsTEJuiFnKOGRy-FQq
|
|
101
87
|
sonusai/utils/create_ts_name.py,sha256=8RLKmgXwuGcbDMGgtTuc0MvGFfA7IOVqfjkE2T18GOo,405
|
102
88
|
sonusai/utils/dataclass_from_dict.py,sha256=vAGnuMjhy0W9bxZ5usrH7mbQsFog3n0__IC4xyJyVUc,390
|
103
89
|
sonusai/utils/db.py,sha256=lI77MJJLs4CTYxhjFUvBom2Kk2imAP34okOeO4irbDc,371
|
90
|
+
sonusai/utils/docstring.py,sha256=JBecAq_a7KSzZ04tan0BlFA9SzhOQqY9A7Ue85kFQdU,1446
|
104
91
|
sonusai/utils/energy_f.py,sha256=AsoGo3TCMnj9Kzqb9HUAYOL6vPGUseu1p-8z4LG-QYo,1462
|
105
92
|
sonusai/utils/engineering_number.py,sha256=6lWRg-XW3FEllh-Zui1WHYcDnsBEY9cqcwcXPIs2-6o,5515
|
106
93
|
sonusai/utils/get_frames_per_batch.py,sha256=xnq4tV7MT74N0H6b5ZsiAezqdXucboCLQw1Np9XpZbs,134
|
107
94
|
sonusai/utils/get_label_names.py,sha256=bfFV_iFbXdtDp70zAz_CQQN9-QxBw_kt8vyAcr3TITM,828
|
108
95
|
sonusai/utils/grouper.py,sha256=qyZ0nj84yOrC-RZsXHC-KJvcUliGktnV8S6-P3PD6_w,203
|
109
96
|
sonusai/utils/human_readable_size.py,sha256=SjYT0fUlpbfCzCXHo6csir-VMwqfs5ogr-fgLCEqFk4,279
|
110
|
-
sonusai/utils/keras_utils.py,sha256=1FJaaQE2yVYQqB6xTdgzjYKV_yvIA_hKob_uLOLf-0Y,8621
|
111
97
|
sonusai/utils/max_text_width.py,sha256=pxiJMwb_zlkNntexgo7S6lAuF7NLLZvFdOCkxdsQJVY,315
|
98
|
+
sonusai/utils/model_utils.py,sha256=lt2KOGJqsinG71W0i3U29UXFO-47GMAlEabsf2um7bA,862
|
112
99
|
sonusai/utils/numeric_conversion.py,sha256=GRO_2Fba8CcxcFY7bEXKOEUEUX6neA-VN__Bxi1ULsE,340
|
113
|
-
sonusai/utils/onnx_utils.py,sha256=
|
100
|
+
sonusai/utils/onnx_utils.py,sha256=BRsHGlcu5L0v_1z83MNy8TAcBeb7tJd_4OBJgOMLen8,2200
|
114
101
|
sonusai/utils/parallel.py,sha256=bxedjCzBv9oxzU7NajRr6mOKmkCWr2P7FWAI0p2p9N8,1981
|
115
102
|
sonusai/utils/print_mixture_details.py,sha256=BzYM4-wHHNa6zxPzBMUJxwKt0gKHmvbwdd7Yp0w15Yk,3017
|
116
103
|
sonusai/utils/ranges.py,sha256=NPBZOVzMb95GTOIxltVO-wSzgcXqZ14wbdV46JDLKrw,1222
|
@@ -120,11 +107,10 @@ sonusai/utils/reshape.py,sha256=E8Eu6grynaeWwVO6peIR0BF22SrVaJSa1Rkl109lq6Y,5997
|
|
120
107
|
sonusai/utils/seconds_to_hms.py,sha256=oxLuZhTJJr9swj-fOSOrZJ5vBNM7_BrOMQhX1pYpiv0,260
|
121
108
|
sonusai/utils/stacked_complex.py,sha256=feLhz3GC1ILxBGMHOj3sJK--sidsXKbfwkalwAVwizc,2950
|
122
109
|
sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2zJ7K1fm_33UzE,7022
|
123
|
-
sonusai/utils/trim_docstring.py,sha256=dSrtiRsEN4wkkvKBp6WDr13RUypfqZzgH_jOBLs1ouY,881
|
124
110
|
sonusai/utils/wave.py,sha256=O4ZXkZ6wjrKGa99wBCdFd8G6bp91MXXDnmGihpaEMh0,856
|
125
111
|
sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
|
126
112
|
sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
|
127
|
-
sonusai-0.
|
128
|
-
sonusai-0.
|
129
|
-
sonusai-0.
|
130
|
-
sonusai-0.
|
113
|
+
sonusai-0.16.1.dist-info/METADATA,sha256=XJz2OIEx2jTaDJXsiXECpUgD7kr6y3jn1dhs-YmFRNM,2443
|
114
|
+
sonusai-0.16.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
115
|
+
sonusai-0.16.1.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
|
116
|
+
sonusai-0.16.1.dist-info/RECORD,,
|
@@ -1,143 +0,0 @@
|
|
1
|
-
import math
|
2
|
-
import warnings
|
3
|
-
from dataclasses import dataclass
|
4
|
-
from typing import Optional
|
5
|
-
|
6
|
-
import numpy as np
|
7
|
-
|
8
|
-
from sonusai.mixture import GeneralizedIDs
|
9
|
-
from sonusai.mixture import MixtureDatabase
|
10
|
-
|
11
|
-
with warnings.catch_warnings():
|
12
|
-
warnings.simplefilter('ignore')
|
13
|
-
from keras.utils import Sequence
|
14
|
-
|
15
|
-
|
16
|
-
class DatasetFromMixtureDatabase(Sequence):
|
17
|
-
"""Generates data for Keras from a SonusAI mixture database
|
18
|
-
"""
|
19
|
-
|
20
|
-
@dataclass(frozen=True)
|
21
|
-
class BatchParams:
|
22
|
-
mixids: list[int]
|
23
|
-
offset: int
|
24
|
-
extra: int
|
25
|
-
padding: int
|
26
|
-
|
27
|
-
def __init__(self,
|
28
|
-
mixdb: MixtureDatabase,
|
29
|
-
mixids: GeneralizedIDs,
|
30
|
-
batch_size: int,
|
31
|
-
timesteps: int,
|
32
|
-
flatten: bool,
|
33
|
-
add1ch: bool,
|
34
|
-
shuffle: bool = False):
|
35
|
-
"""Initialization
|
36
|
-
"""
|
37
|
-
self.mixdb = mixdb
|
38
|
-
self.mixids = self.mixdb.mixids_to_list(mixids)
|
39
|
-
self.batch_size = batch_size
|
40
|
-
self.timesteps = timesteps
|
41
|
-
self.flatten = flatten
|
42
|
-
self.add1ch = add1ch
|
43
|
-
self.shuffle = shuffle
|
44
|
-
self.stride = self.mixdb.fg_stride
|
45
|
-
self.feature_parameters = self.mixdb.feature_parameters
|
46
|
-
self.num_classes = self.mixdb.num_classes
|
47
|
-
self.mixture_frame_segments = None
|
48
|
-
self.batch_frame_segments = None
|
49
|
-
self.total_batches: Optional[int] = None
|
50
|
-
|
51
|
-
self._initialize_mixtures()
|
52
|
-
|
53
|
-
def __len__(self) -> int:
|
54
|
-
"""Denotes the number of batches per epoch
|
55
|
-
"""
|
56
|
-
return self.total_batches
|
57
|
-
|
58
|
-
def __getitem__(self, batch_index: int) -> tuple[np.ndarray, np.ndarray]:
|
59
|
-
"""Get one batch of data
|
60
|
-
"""
|
61
|
-
from sonusai.utils import reshape_inputs
|
62
|
-
|
63
|
-
batch_params = self.batch_params[batch_index]
|
64
|
-
|
65
|
-
result = [self.mixdb.mixture_ft(mixid) for mixid in batch_params.mixids]
|
66
|
-
feature = np.vstack([result[i][0] for i in range(len(result))])
|
67
|
-
truth = np.vstack([result[i][1] for i in range(len(result))])
|
68
|
-
|
69
|
-
pad_shape = list(feature.shape)
|
70
|
-
pad_shape[0] = batch_params.padding
|
71
|
-
feature = np.vstack([feature, np.zeros(pad_shape)])
|
72
|
-
|
73
|
-
pad_shape = list(truth.shape)
|
74
|
-
pad_shape[0] = batch_params.padding
|
75
|
-
truth = np.vstack([truth, np.zeros(pad_shape)])
|
76
|
-
|
77
|
-
if batch_params.extra > 0:
|
78
|
-
feature = feature[batch_params.offset:-batch_params.extra]
|
79
|
-
truth = truth[batch_params.offset:-batch_params.extra]
|
80
|
-
else:
|
81
|
-
feature = feature[batch_params.offset:]
|
82
|
-
truth = truth[batch_params.offset:]
|
83
|
-
|
84
|
-
feature, truth = reshape_inputs(feature=feature,
|
85
|
-
truth=truth,
|
86
|
-
batch_size=self.batch_size,
|
87
|
-
timesteps=self.timesteps,
|
88
|
-
flatten=self.flatten,
|
89
|
-
add1ch=self.add1ch)
|
90
|
-
|
91
|
-
return feature, truth
|
92
|
-
|
93
|
-
def on_epoch_end(self) -> None:
|
94
|
-
"""Modification of dataset between epochs
|
95
|
-
"""
|
96
|
-
import random
|
97
|
-
|
98
|
-
if self.shuffle:
|
99
|
-
random.shuffle(self.mixids)
|
100
|
-
self._initialize_mixtures()
|
101
|
-
|
102
|
-
def _initialize_mixtures(self) -> None:
|
103
|
-
from sonusai.utils import get_frames_per_batch
|
104
|
-
|
105
|
-
frames_per_batch = get_frames_per_batch(self.batch_size, self.timesteps)
|
106
|
-
# Always extend the number of batches to use all available data
|
107
|
-
# The last batch may need padding
|
108
|
-
self.total_batches = math.ceil(self.mixdb.total_feature_frames(self.mixids) / frames_per_batch)
|
109
|
-
|
110
|
-
# Compute mixid, offset, and extra for dataset
|
111
|
-
# offsets and extras are needed because mixtures are not guaranteed to fall on batch boundaries.
|
112
|
-
# When fetching a new index that starts in the middle of a sequence of mixtures, the
|
113
|
-
# previous feature frame offset must be maintained in order to preserve the correct
|
114
|
-
# data sequence. And the extra must be maintained in order to preserve the correct data length.
|
115
|
-
cumulative_frames = 0
|
116
|
-
start_mixture_index = 0
|
117
|
-
offset = 0
|
118
|
-
self.batch_params = []
|
119
|
-
self.file_indices = []
|
120
|
-
total_frames = 0
|
121
|
-
for idx, mixid in enumerate(self.mixids):
|
122
|
-
current_frames = self.mixdb.mixture(mixid).samples // self.mixdb.feature_step_samples
|
123
|
-
self.file_indices.append(slice(total_frames, total_frames + current_frames))
|
124
|
-
total_frames += current_frames
|
125
|
-
cumulative_frames += current_frames
|
126
|
-
while cumulative_frames >= frames_per_batch:
|
127
|
-
extra = cumulative_frames - frames_per_batch
|
128
|
-
mixids = self.mixids[start_mixture_index:idx + 1]
|
129
|
-
self.batch_params.append(self.BatchParams(mixids=mixids, offset=offset, extra=extra, padding=0))
|
130
|
-
if extra == 0:
|
131
|
-
start_mixture_index = idx + 1
|
132
|
-
offset = 0
|
133
|
-
else:
|
134
|
-
start_mixture_index = idx
|
135
|
-
offset = current_frames - extra
|
136
|
-
cumulative_frames = extra
|
137
|
-
|
138
|
-
# If needed, add final batch with padding
|
139
|
-
needed_frames = self.total_batches * frames_per_batch
|
140
|
-
padding = needed_frames - total_frames
|
141
|
-
if padding != 0:
|
142
|
-
mixids = self.mixids[start_mixture_index:]
|
143
|
-
self.batch_params.append(self.BatchParams(mixids=mixids, offset=offset, extra=0, padding=padding))
|