sonusai 0.15.8__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. sonusai/__init__.py +35 -4
  2. sonusai/audiofe.py +237 -0
  3. sonusai/calc_metric_spenh.py +21 -12
  4. sonusai/genft.py +2 -1
  5. sonusai/genmixdb.py +5 -5
  6. sonusai/lsdb.py +2 -2
  7. sonusai/main.py +58 -61
  8. sonusai/mixture/__init__.py +4 -2
  9. sonusai/mixture/audio.py +0 -34
  10. sonusai/mixture/config.py +1 -2
  11. sonusai/mixture/datatypes.py +1 -1
  12. sonusai/mixture/feature.py +75 -21
  13. sonusai/mixture/helpers.py +60 -30
  14. sonusai/mixture/log_duration_and_sizes.py +2 -2
  15. sonusai/mixture/mixdb.py +13 -10
  16. sonusai/mixture/spectral_mask.py +14 -14
  17. sonusai/mixture/truth_functions/data.py +1 -1
  18. sonusai/mixture/truth_functions/target.py +2 -2
  19. sonusai/mkmanifest.py +29 -2
  20. sonusai/onnx_predict.py +1 -1
  21. sonusai/plot.py +4 -4
  22. sonusai/post_spenh_targetf.py +8 -8
  23. sonusai/utils/__init__.py +8 -7
  24. sonusai/utils/asl_p56.py +3 -3
  25. sonusai/utils/asr.py +35 -8
  26. sonusai/utils/asr_functions/__init__.py +0 -5
  27. sonusai/utils/asr_functions/aaware_whisper.py +2 -2
  28. sonusai/utils/asr_manifest_functions/__init__.py +1 -0
  29. sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
  30. sonusai/utils/audio_devices.py +41 -0
  31. sonusai/utils/calculate_input_shape.py +3 -4
  32. sonusai/utils/create_timestamp.py +5 -0
  33. sonusai/utils/{trim_docstring.py → docstring.py} +20 -0
  34. sonusai/utils/model_utils.py +30 -0
  35. sonusai/utils/onnx_utils.py +19 -45
  36. sonusai/utils/reshape.py +11 -11
  37. sonusai/utils/wave.py +12 -5
  38. {sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/METADATA +8 -19
  39. {sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/RECORD +41 -54
  40. {sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/WHEEL +1 -1
  41. sonusai/data_generator/__init__.py +0 -5
  42. sonusai/data_generator/dataset_from_mixdb.py +0 -143
  43. sonusai/data_generator/keras_from_mixdb.py +0 -169
  44. sonusai/data_generator/torch_from_mixdb.py +0 -122
  45. sonusai/evaluate.py +0 -245
  46. sonusai/keras_onnx.py +0 -86
  47. sonusai/keras_predict.py +0 -231
  48. sonusai/keras_train.py +0 -334
  49. sonusai/torchl_onnx.py +0 -216
  50. sonusai/torchl_predict.py +0 -547
  51. sonusai/torchl_train.py +0 -223
  52. sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
  53. sonusai/utils/asr_functions/data.py +0 -16
  54. sonusai/utils/asr_functions/deepgram.py +0 -97
  55. sonusai/utils/asr_functions/fastwhisper.py +0 -90
  56. sonusai/utils/asr_functions/google.py +0 -95
  57. sonusai/utils/asr_functions/whisper.py +0 -49
  58. sonusai/utils/keras_utils.py +0 -226
  59. {sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/entry_points.txt +0 -0
sonusai/utils/asl_p56.py CHANGED
@@ -22,7 +22,7 @@ def asl_p56(audio: AudioT) -> float:
22
22
  # Hangover time in seconds
23
23
  H = 0.2
24
24
  # Rounded up to next integer
25
- I = np.ceil(H * SAMPLE_RATE)
25
+ H_samples = np.ceil(H * SAMPLE_RATE)
26
26
 
27
27
  # Margin in dB, difference between threshold and active speech level
28
28
  M = 15.9
@@ -40,7 +40,7 @@ def asl_p56(audio: AudioT) -> float:
40
40
  a = np.full(thresh_num, -1)
41
41
 
42
42
  # Hangover counter for each threshold
43
- h = np.full(thresh_num, I)
43
+ h = np.full(thresh_num, H_samples)
44
44
 
45
45
  # Long-term level square energy of audio
46
46
  sq = sum(np.square(audio))
@@ -55,7 +55,7 @@ def asl_p56(audio: AudioT) -> float:
55
55
  if q[k] >= c[j]:
56
56
  a[j] = a[j] + 1
57
57
  h[j] = 0
58
- elif h[j] < I:
58
+ elif h[j] < H_samples:
59
59
  a[j] = a[j] + 1
60
60
  h[j] = h[j] + 1
61
61
  else:
sonusai/utils/asr.py CHANGED
@@ -1,10 +1,22 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Any
3
+ from typing import Callable
3
4
  from typing import Optional
4
5
 
5
6
  from sonusai.mixture import AudioT
6
7
 
7
8
 
9
+ @dataclass(frozen=True)
10
+ class ASRData:
11
+ audio: AudioT
12
+ whisper_model: Optional[Any] = None
13
+ whisper_model_name: Optional[str] = None
14
+ device: Optional[str] = None
15
+ cpu_threads: Optional[int] = None
16
+ compute_type: Optional[str] = None
17
+ beam_size: Optional[int] = None
18
+
19
+
8
20
  @dataclass(frozen=True)
9
21
  class ASRResult:
10
22
  text: str
@@ -16,8 +28,24 @@ class ASRResult:
16
28
  asr_cpu_time: Optional[float] = None
17
29
 
18
30
 
31
+ def get_available_engines() -> dict[str, Callable[[ASRData], ASRResult]]:
32
+ from importlib import import_module
33
+ from pkgutil import iter_modules
34
+
35
+ module = import_module('sonusai.utils.asr_functions')
36
+ engines = {method: getattr(module, method) for method in dir(module) if not method.startswith('_')}
37
+ for _, name, _ in iter_modules():
38
+ if name.startswith('sonusai_asr_'):
39
+ module = import_module(f'{name}.asr_functions')
40
+ for method in dir(module):
41
+ if not method.startswith('_'):
42
+ engines[method] = getattr(module, method)
43
+
44
+ return engines
45
+
46
+
19
47
  def calc_asr(audio: AudioT | str,
20
- engine: Optional[str] = 'deepgram',
48
+ engine: Optional[str] = 'aaware_whisper',
21
49
  whisper_model: Optional[Any] = None,
22
50
  whisper_model_name: Optional[str] = 'tiny',
23
51
  device: Optional[str] = 'cpu',
@@ -43,15 +71,14 @@ def calc_asr(audio: AudioT | str,
43
71
 
44
72
  from sonusai import SonusAIError
45
73
  from sonusai.mixture import read_audio
46
- from sonusai.utils import asr_functions
47
- from sonusai.utils.asr_functions.data import Data
74
+
75
+ available_engines = get_available_engines()
76
+ if engine not in available_engines:
77
+ raise SonusAIError(f'Unsupported ASR function: {engine}')
48
78
 
49
79
  if not isinstance(audio, np.ndarray):
50
80
  audio = copy(read_audio(audio))
51
81
 
52
- data = Data(audio, whisper_model, whisper_model_name, device, cpu_threads, compute_type, beam_size)
82
+ data = ASRData(audio, whisper_model, whisper_model_name, device, cpu_threads, compute_type, beam_size)
53
83
 
54
- try:
55
- return getattr(asr_functions, engine)(data)
56
- except AttributeError:
57
- raise SonusAIError(f'Unsupported ASR function: {engine}')
84
+ return available_engines[engine](data)
@@ -1,6 +1 @@
1
1
  from .aaware_whisper import aaware_whisper
2
- from .aixplain_whisper import aixplain_whisper
3
- from .deepgram import deepgram
4
- from .fastwhisper import fastwhisper
5
- from .google import google
6
- from .whisper import whisper
@@ -1,8 +1,8 @@
1
+ from sonusai.utils import ASRData
1
2
  from sonusai.utils import ASRResult
2
- from sonusai.utils.asr_functions.data import Data
3
3
 
4
4
 
5
- def aaware_whisper(data: Data) -> ASRResult:
5
+ def aaware_whisper(data: ASRData) -> ASRResult:
6
6
  import tempfile
7
7
  from math import exp
8
8
  from os import getenv
@@ -4,3 +4,4 @@ from .librispeech import collect_librispeech_transcripts
4
4
  from .librispeech import get_librispeech_manifest_entry
5
5
  from .vctk_noisy_speech import collect_vctk_noisy_speech_transcripts
6
6
  from .vctk_noisy_speech import get_vctk_noisy_speech_manifest_entry
7
+ from .mcgill_speech import get_mcgill_speech_manifest_entry
@@ -0,0 +1,29 @@
1
+ from sonusai.utils.asr_manifest_functions import PathInfo
2
+
3
+
4
+ def get_mcgill_speech_manifest_entry(entry: PathInfo, transcript_data: list[str]) -> dict:
5
+ from os.path import splitext
6
+ from os.path import basename
7
+ from subprocess import check_output
8
+
9
+ from sonusai import SonusAIError
10
+
11
+ name = splitext(entry.abs_path)[0]
12
+ duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
13
+ # i.e., from MA01_02.wav, get 01_02
14
+ promptname = basename(name)[2:]
15
+ # paragraph num
16
+ pnum = int(promptname[0:2])
17
+ snum = int(promptname[3:5])
18
+ idx = 11 * (pnum - 1) + (snum - 1)
19
+ try:
20
+ # remove prompt-id prefix and \n suffix
21
+ text = transcript_data[idx][6:-1]
22
+ except IndexError:
23
+ raise SonusAIError(f'Could not find {promptname}, idx {idx} in transcript data')
24
+
25
+ return {
26
+ 'audio_filepath': entry.audio_filepath,
27
+ 'text': text,
28
+ 'duration': duration,
29
+ }
@@ -0,0 +1,41 @@
1
+ import pyaudio
2
+
3
+
4
+ def get_input_device_index_by_name(p: pyaudio.PyAudio, name: str = None) -> int:
5
+ info = p.get_host_api_info_by_index(0)
6
+ device_count = info.get('deviceCount')
7
+ for i in range(0, device_count):
8
+ device_info = p.get_device_info_by_host_api_device_index(0, i)
9
+ if name is None:
10
+ device_name = None
11
+ else:
12
+ device_name = device_info.get('name')
13
+ if name == device_name and device_info.get('maxInputChannels') > 0:
14
+ return i
15
+
16
+ raise ValueError(f'Could not find {name}')
17
+
18
+
19
+ def get_input_devices(p: pyaudio.PyAudio) -> list[str]:
20
+ names = []
21
+ info = p.get_host_api_info_by_index(0)
22
+ device_count = info.get('deviceCount')
23
+ for i in range(0, device_count):
24
+ device_info = p.get_device_info_by_host_api_device_index(0, i)
25
+ device_name = device_info.get('name')
26
+ if device_info.get('maxInputChannels') > 0:
27
+ names.append(device_name)
28
+
29
+ return names
30
+
31
+
32
+ def get_default_input_device(p: pyaudio.PyAudio) -> str:
33
+ info = p.get_host_api_info_by_index(0)
34
+ device_count = info.get('deviceCount')
35
+ for i in range(0, device_count):
36
+ device_info = p.get_device_info_by_host_api_device_index(0, i)
37
+ device_name = device_info.get('name')
38
+ if device_info.get('maxInputChannels') > 0:
39
+ return device_name
40
+
41
+ raise ValueError('No input audio devices found')
@@ -13,13 +13,12 @@ def calculate_input_shape(feature: str,
13
13
  """
14
14
  from pyaaware import FeatureGenerator
15
15
 
16
- # num_classes is irrelevant, set to 2
17
- fg = FeatureGenerator(feature_mode=feature, num_classes=2)
16
+ fg = FeatureGenerator(feature_mode=feature)
18
17
 
19
18
  if flatten:
20
- in_shape = [fg.stride * fg.num_bands]
19
+ in_shape = [fg.stride * fg.feature_parameters]
21
20
  else:
22
- in_shape = [fg.stride, fg.num_bands]
21
+ in_shape = [fg.stride, fg.feature_parameters]
23
22
 
24
23
  if timesteps > 0:
25
24
  in_shape.insert(0, timesteps)
@@ -0,0 +1,5 @@
1
+ def create_timestamp() -> str:
2
+ """Create a timestamp."""
3
+ from datetime import datetime
4
+
5
+ return datetime.now().strftime('%Y%m%d-%H%M%S')
@@ -28,3 +28,23 @@ def trim_docstring(docstring: str) -> str:
28
28
 
29
29
  # Return a single string
30
30
  return '\n'.join(trimmed)
31
+
32
+
33
+ def add_commands_to_docstring(docstring: str, plugin_docstrings: list[str]) -> str:
34
+ """Add commands to docstring"""
35
+ import sonusai
36
+
37
+ lines = docstring.splitlines()
38
+
39
+ start = lines.index('The sonusai commands are:')
40
+ end = lines.index('', start)
41
+
42
+ commands = sonusai.commands_doc.splitlines()
43
+ for plugin_docstring in plugin_docstrings:
44
+ commands.extend(plugin_docstring.splitlines())
45
+ commands.sort()
46
+ commands = list(filter(None, commands))
47
+
48
+ lines = lines[:start + 1] + commands + lines[end:]
49
+
50
+ return '\n'.join(lines)
@@ -0,0 +1,30 @@
1
+ from typing import Any
2
+
3
+
4
+ def import_module(name: str) -> Any:
5
+ """Import a Python module adding the module file's directory to the Python system path so that relative package
6
+ imports are found correctly.
7
+ """
8
+ import os
9
+ import sys
10
+ from importlib import import_module
11
+
12
+ from sonusai import SonusAIError
13
+
14
+ try:
15
+ path = os.path.dirname(name)
16
+ if len(path) < 1:
17
+ path = './'
18
+
19
+ # Add model file location to system path
20
+ sys.path.append(os.path.abspath(path))
21
+
22
+ try:
23
+ root = os.path.splitext(os.path.basename(name))[0]
24
+ model = import_module(root)
25
+ except Exception as e:
26
+ raise SonusAIError(f'Error: could not import model from {name}: {e}.')
27
+ except Exception as e:
28
+ raise SonusAIError(f'Error: could not find {name}: {e}.')
29
+
30
+ return model
@@ -3,33 +3,15 @@ from dataclasses import dataclass
3
3
  from onnxruntime import InferenceSession
4
4
 
5
5
 
6
- def replace_stateful_grus(keras_model, onnx_model):
7
- """Replace stateful GRUs with custom layers."""
8
- import warnings
9
-
10
- with warnings.catch_warnings():
11
- warnings.simplefilter('ignore')
12
- from keras.layers import GRU
13
-
14
- stateful_gru_names = []
15
- for i in range(len(keras_model.layers)):
16
- layer = keras_model.layers[i]
17
- if isinstance(layer, GRU):
18
- if layer.stateful:
19
- stateful_gru_names.append(layer.name)
20
-
21
- for node_index in range(len(onnx_model.graph.node)):
22
- node = onnx_model.graph.node[node_index]
23
- replace = False
24
- if node.op_type == 'GRU':
25
- for i in node.input:
26
- for n in stateful_gru_names:
27
- if n in i:
28
- replace = True
29
- if node.name in stateful_gru_names or replace:
30
- node.op_type = 'SGRU'
31
-
32
- return onnx_model
6
+ @dataclass(frozen=True)
7
+ class SonusAIMetaData:
8
+ input_shape: list[int]
9
+ output_shape: list[int]
10
+ flattened: bool
11
+ timestep: bool
12
+ channel: bool
13
+ mutex: bool
14
+ feature: str
33
15
 
34
16
 
35
17
  def add_sonusai_metadata(model,
@@ -38,13 +20,14 @@ def add_sonusai_metadata(model,
38
20
  has_channel: bool = False,
39
21
  is_mutex: bool = True,
40
22
  feature: str = ''):
41
- """Add SonusAI metadata to ONNX model.
42
- model keras model
43
- is_flattened model feature data is flattened
44
- has_timestep model has timestep dimension
45
- has_channel model has channel dimension
46
- is_mutex model label output is mutually exclusive
47
- feature model feature type
23
+ """Add SonusAI metadata to an ONNX model.
24
+
25
+ :param model: ONNX model
26
+ :param is_flattened: Model feature data is flattened
27
+ :param has_timestep: Model has timestep dimension
28
+ :param has_channel: Model has channel dimension
29
+ :param is_mutex: Model label output is mutually exclusive
30
+ :param feature: Model feature type
48
31
  """
49
32
  is_flattened_flag = model.metadata_props.add()
50
33
  is_flattened_flag.key = 'is_flattened'
@@ -69,18 +52,9 @@ def add_sonusai_metadata(model,
69
52
  return model
70
53
 
71
54
 
72
- @dataclass(frozen=True)
73
- class SonusAIMetaData:
74
- input_shape: list[int]
75
- output_shape: list[int]
76
- flattened: bool
77
- timestep: bool
78
- channel: bool
79
- mutex: bool
80
- feature: str
81
-
82
-
83
55
  def get_sonusai_metadata(model: InferenceSession) -> SonusAIMetaData:
56
+ """Get SonusAI metadata from an ONNX model.
57
+ """
84
58
  m = model.get_modelmeta().custom_metadata_map
85
59
  return SonusAIMetaData(input_shape=model.get_inputs()[0].shape,
86
60
  output_shape=model.get_outputs()[0].shape,
sonusai/utils/reshape.py CHANGED
@@ -17,14 +17,14 @@ def reshape_inputs(feature: Feature,
17
17
  timesteps: int = 0,
18
18
  flatten: bool = False,
19
19
  add1ch: bool = False) -> tuple[Feature, Optional[Truth]]:
20
- """Check SonusAI feature and truth data and reshape feature of size [frames, strides, bands] into
20
+ """Check SonusAI feature and truth data and reshape feature of size [frames, strides, feature_parameters] into
21
21
  one of several options:
22
22
 
23
23
  If timesteps > 0: (i.e., for recurrent NNs):
24
- no-flatten, no-channel: [sequences, timesteps, strides, bands] (4-dim)
25
- flatten, no-channel: [sequences, timesteps, strides*bands] (3-dim)
26
- no-flatten, add-1channel: [sequences, timesteps, strides, bands, 1] (5-dim)
27
- flatten, add-1channel: [sequences, timesteps, strides*bands, 1] (4-dim)
24
+ no-flatten, no-channel: [sequences, timesteps, strides, feature_parameters] (4-dim)
25
+ flatten, no-channel: [sequences, timesteps, strides*feature_parameters] (3-dim)
26
+ no-flatten, add-1channel: [sequences, timesteps, strides, feature_parameters, 1] (5-dim)
27
+ flatten, add-1channel: [sequences, timesteps, strides*feature_parameters, 1] (4-dim)
28
28
 
29
29
  If batch_size is None, then do not reshape; just calculate new input shape and return.
30
30
 
@@ -40,7 +40,7 @@ def reshape_inputs(feature: Feature,
40
40
  """
41
41
  from sonusai import SonusAIError
42
42
 
43
- frames, strides, bands = feature.shape
43
+ frames, strides, feature_parameters = feature.shape
44
44
  if truth is not None:
45
45
  truth_frames, num_classes = truth.shape
46
46
  # Double-check correctness of inputs
@@ -50,7 +50,7 @@ def reshape_inputs(feature: Feature,
50
50
  num_classes = None
51
51
 
52
52
  if flatten:
53
- feature = np.reshape(feature, (frames, strides * bands))
53
+ feature = np.reshape(feature, (frames, strides * feature_parameters))
54
54
 
55
55
  # Reshape for Keras/TF recurrent models that require timesteps/sequence length dimension
56
56
  if timesteps > 0:
@@ -73,14 +73,14 @@ def reshape_inputs(feature: Feature,
73
73
 
74
74
  # Reshape
75
75
  if feature.ndim == 2: # flattened input
76
- # was [frames, bands*timesteps]
77
- feature = np.reshape(feature, (sequences, timesteps, strides * bands))
76
+ # was [frames, feature_parameters*timesteps]
77
+ feature = np.reshape(feature, (sequences, timesteps, strides * feature_parameters))
78
78
  if truth is not None:
79
79
  # was [frames, num_classes]
80
80
  truth = np.reshape(truth, (sequences, timesteps, num_classes))
81
81
  elif feature.ndim == 3: # un-flattened input
82
- # was [frames, bands, timesteps]
83
- feature = np.reshape(feature, (sequences, timesteps, strides, bands))
82
+ # was [frames, feature_parameters, timesteps]
83
+ feature = np.reshape(feature, (sequences, timesteps, strides, feature_parameters))
84
84
  if truth is not None:
85
85
  # was [frames, num_classes]
86
86
  truth = np.reshape(truth, (sequences, timesteps, num_classes))
sonusai/utils/wave.py CHANGED
@@ -5,15 +5,22 @@ from sonusai.mixture.datatypes import AudioT
5
5
  def write_wav(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
6
6
  """ Write a simple, uncompressed WAV file.
7
7
 
8
- To write multiple channels, use a 2D array of shape [samples, channels].
8
+ To write multiple channels, use a 2D array of shape [channels, samples].
9
9
  The bits per sample and PCM/float are determined by the data type.
10
10
 
11
11
  """
12
- import numpy as np
13
12
  import torch
14
13
  import torchaudio
15
14
 
16
- if audio.ndim == 1:
17
- audio = np.reshape(audio, (1, audio.shape[0]))
15
+ data = torch.tensor(audio)
18
16
 
19
- torchaudio.save(name, torch.tensor(audio), sample_rate)
17
+ if data.dim() == 1:
18
+ data = torch.reshape(data, (1, data.shape[0]))
19
+ if data.dim() != 2:
20
+ raise ValueError(f'audio must be a 1D or 2D array')
21
+
22
+ # Assuming data has more samples than channels, check if array needs to be transposed
23
+ if data.shape[1] < data.shape[0]:
24
+ data = torch.transpose(data, 0, 1)
25
+
26
+ torchaudio.save(uri=name, src=data, sample_rate=sample_rate)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.15.8
3
+ Version: 0.16.0
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -15,50 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
15
15
  Classifier: Programming Language :: Python :: 3.10
16
16
  Classifier: Programming Language :: Python :: 3.11
17
17
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
18
- Requires-Dist: aixplain (>=0.2.6,<0.3.0)
19
- Requires-Dist: ctranslate2 (==4.1.0)
20
18
  Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
21
- Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
22
19
  Requires-Dist: docopt (>=0.6.2,<0.7.0)
23
- Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
24
20
  Requires-Dist: h5py (>=3.11.0,<4.0.0)
25
21
  Requires-Dist: jiwer (>=3.0.3,<4.0.0)
26
- Requires-Dist: keras (>=3.1.1,<4.0.0)
27
- Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
28
22
  Requires-Dist: librosa (>=0.10.1,<0.11.0)
29
- Requires-Dist: lightning (>=2.2,<2.3)
30
23
  Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
31
24
  Requires-Dist: onnx (>=1.14.1,<2.0.0)
32
25
  Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
33
26
  Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
34
27
  Requires-Dist: pandas (>=2.1.1,<3.0.0)
35
28
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
36
- Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
29
+ Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
30
+ Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
37
31
  Requires-Dist: pydub (>=0.25.1,<0.26.0)
38
32
  Requires-Dist: pystoi (>=0.4.0,<0.5.0)
39
- Requires-Dist: python-magic (>=0.4.27,<0.5.0)
40
33
  Requires-Dist: requests (>=2.31.0,<3.0.0)
41
34
  Requires-Dist: samplerate (>=0.2.1,<0.3.0)
42
35
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
43
36
  Requires-Dist: sox (>=1.4.1,<2.0.0)
44
- Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
45
- Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
46
- Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
47
37
  Requires-Dist: torch (>=2.2,<2.3)
48
38
  Requires-Dist: torchaudio (>=2.2,<2.3)
49
- Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
50
39
  Requires-Dist: tqdm (>=4.66.1,<5.0.0)
51
40
  Description-Content-Type: text/x-rst
52
41
 
53
- Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
42
+ SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
54
43
 
55
- Sonus AI includes functions for pre-processing training and validation data and
44
+ SonusAI includes functions for pre-processing training and validation data and
56
45
  creating performance metrics reports for key types of Keras models:
57
46
  - recurrent, convolutional, or a combination (i.e. RCNNs)
58
47
  - binary, multiclass single-label, multiclass multi-label, and regression
59
48
  - training with data augmentations: noise mixing, pitch and time stretch, etc.
60
49
 
61
- Sonus AI python functions are used by:
62
- - Aaware Inc. sonusai executable: Easily create train/validation data, run prediction, evaluate model performance
63
- - Keras model scripts: User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
50
+ SonusAI python functions are used by:
51
+ - Aaware Inc. sonusai framework: Easily create train/validation data, run prediction, evaluate model performance
52
+ - Keras model scripts: User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.
64
53