sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. sonusai/__init__.py +20 -29
  2. sonusai/aawscd_probwrite.py +18 -18
  3. sonusai/audiofe.py +93 -80
  4. sonusai/calc_metric_spenh.py +395 -321
  5. sonusai/data/genmixdb.yml +5 -11
  6. sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
  7. sonusai/{plot.py → deprecated/plot.py} +177 -131
  8. sonusai/{tplot.py → deprecated/tplot.py} +124 -102
  9. sonusai/doc/__init__.py +1 -1
  10. sonusai/doc/doc.py +112 -177
  11. sonusai/doc.py +10 -10
  12. sonusai/genft.py +81 -91
  13. sonusai/genmetrics.py +51 -61
  14. sonusai/genmix.py +105 -115
  15. sonusai/genmixdb.py +201 -174
  16. sonusai/lsdb.py +56 -66
  17. sonusai/main.py +23 -20
  18. sonusai/metrics/__init__.py +2 -0
  19. sonusai/metrics/calc_audio_stats.py +29 -24
  20. sonusai/metrics/calc_class_weights.py +7 -7
  21. sonusai/metrics/calc_optimal_thresholds.py +5 -7
  22. sonusai/metrics/calc_pcm.py +3 -3
  23. sonusai/metrics/calc_pesq.py +10 -7
  24. sonusai/metrics/calc_phase_distance.py +3 -3
  25. sonusai/metrics/calc_sa_sdr.py +10 -8
  26. sonusai/metrics/calc_segsnr_f.py +16 -18
  27. sonusai/metrics/calc_speech.py +105 -47
  28. sonusai/metrics/calc_wer.py +35 -32
  29. sonusai/metrics/calc_wsdr.py +10 -7
  30. sonusai/metrics/class_summary.py +30 -27
  31. sonusai/metrics/confusion_matrix_summary.py +25 -22
  32. sonusai/metrics/one_hot.py +91 -57
  33. sonusai/metrics/snr_summary.py +53 -46
  34. sonusai/mixture/__init__.py +20 -14
  35. sonusai/mixture/audio.py +4 -6
  36. sonusai/mixture/augmentation.py +37 -43
  37. sonusai/mixture/class_count.py +5 -14
  38. sonusai/mixture/config.py +292 -225
  39. sonusai/mixture/constants.py +41 -30
  40. sonusai/mixture/data_io.py +155 -0
  41. sonusai/mixture/datatypes.py +111 -108
  42. sonusai/mixture/db_datatypes.py +54 -70
  43. sonusai/mixture/eq_rule_is_valid.py +6 -9
  44. sonusai/mixture/feature.py +40 -38
  45. sonusai/mixture/generation.py +522 -389
  46. sonusai/mixture/helpers.py +217 -272
  47. sonusai/mixture/log_duration_and_sizes.py +16 -13
  48. sonusai/mixture/mixdb.py +669 -477
  49. sonusai/mixture/soundfile_audio.py +12 -17
  50. sonusai/mixture/sox_audio.py +91 -112
  51. sonusai/mixture/sox_augmentation.py +8 -9
  52. sonusai/mixture/spectral_mask.py +4 -6
  53. sonusai/mixture/target_class_balancing.py +41 -36
  54. sonusai/mixture/targets.py +69 -67
  55. sonusai/mixture/tokenized_shell_vars.py +23 -23
  56. sonusai/mixture/torchaudio_audio.py +14 -15
  57. sonusai/mixture/torchaudio_augmentation.py +23 -27
  58. sonusai/mixture/truth.py +48 -26
  59. sonusai/mixture/truth_functions/__init__.py +26 -0
  60. sonusai/mixture/truth_functions/crm.py +56 -38
  61. sonusai/mixture/truth_functions/datatypes.py +37 -0
  62. sonusai/mixture/truth_functions/energy.py +85 -59
  63. sonusai/mixture/truth_functions/file.py +30 -30
  64. sonusai/mixture/truth_functions/phoneme.py +14 -7
  65. sonusai/mixture/truth_functions/sed.py +71 -45
  66. sonusai/mixture/truth_functions/target.py +69 -106
  67. sonusai/mkwav.py +58 -101
  68. sonusai/onnx_predict.py +46 -43
  69. sonusai/queries/__init__.py +3 -1
  70. sonusai/queries/queries.py +100 -59
  71. sonusai/speech/__init__.py +2 -0
  72. sonusai/speech/l2arctic.py +24 -23
  73. sonusai/speech/librispeech.py +16 -17
  74. sonusai/speech/mcgill.py +22 -21
  75. sonusai/speech/textgrid.py +32 -25
  76. sonusai/speech/timit.py +45 -42
  77. sonusai/speech/vctk.py +14 -13
  78. sonusai/speech/voxceleb.py +26 -20
  79. sonusai/summarize_metric_spenh.py +11 -10
  80. sonusai/utils/__init__.py +4 -3
  81. sonusai/utils/asl_p56.py +1 -1
  82. sonusai/utils/asr.py +37 -17
  83. sonusai/utils/asr_functions/__init__.py +2 -0
  84. sonusai/utils/asr_functions/aaware_whisper.py +18 -12
  85. sonusai/utils/audio_devices.py +12 -12
  86. sonusai/utils/braced_glob.py +6 -8
  87. sonusai/utils/calculate_input_shape.py +1 -4
  88. sonusai/utils/compress.py +2 -2
  89. sonusai/utils/convert_string_to_number.py +1 -3
  90. sonusai/utils/create_timestamp.py +1 -1
  91. sonusai/utils/create_ts_name.py +2 -2
  92. sonusai/utils/dataclass_from_dict.py +1 -1
  93. sonusai/utils/docstring.py +6 -6
  94. sonusai/utils/energy_f.py +9 -7
  95. sonusai/utils/engineering_number.py +56 -54
  96. sonusai/utils/get_label_names.py +8 -10
  97. sonusai/utils/human_readable_size.py +2 -2
  98. sonusai/utils/model_utils.py +3 -5
  99. sonusai/utils/numeric_conversion.py +2 -4
  100. sonusai/utils/onnx_utils.py +43 -32
  101. sonusai/utils/parallel.py +41 -30
  102. sonusai/utils/print_mixture_details.py +25 -22
  103. sonusai/utils/ranges.py +12 -12
  104. sonusai/utils/read_predict_data.py +11 -9
  105. sonusai/utils/reshape.py +19 -26
  106. sonusai/utils/seconds_to_hms.py +1 -1
  107. sonusai/utils/stacked_complex.py +8 -16
  108. sonusai/utils/stratified_shuffle_split.py +29 -27
  109. sonusai/utils/write_audio.py +2 -2
  110. sonusai/utils/yes_or_no.py +3 -3
  111. sonusai/vars.py +14 -14
  112. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
  113. sonusai-0.19.6.dist-info/RECORD +125 -0
  114. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
  115. sonusai/mixture/truth_functions/data.py +0 -58
  116. sonusai/utils/read_mixture_data.py +0 -14
  117. sonusai-0.18.9.dist-info/RECORD +0 -125
  118. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
sonusai/utils/ranges.py CHANGED
@@ -2,18 +2,18 @@ def expand_range(s: str, sort: bool = True) -> list[int]:
2
2
  """Returns a list of integers from a string input representing a range."""
3
3
  import re
4
4
 
5
- clean_s = s.replace(':', '-')
6
- clean_s = clean_s.replace(';', ',')
7
- clean_s = re.sub(r' +', ',', clean_s)
8
- clean_s = re.sub(r',+', ',', clean_s)
5
+ clean_s = s.replace(":", "-")
6
+ clean_s = clean_s.replace(";", ",")
7
+ clean_s = re.sub(r" +", ",", clean_s)
8
+ clean_s = re.sub(r",+", ",", clean_s)
9
9
 
10
10
  r: list[int] = []
11
- for i in clean_s.split(','):
12
- if '-' not in i:
11
+ for i in clean_s.split(","):
12
+ if "-" not in i:
13
13
  r.append(int(i))
14
14
  else:
15
- l, h = map(int, i.split('-'))
16
- r += range(l, h + 1)
15
+ lo, hi = map(int, i.split("-"))
16
+ r += range(lo, hi + 1)
17
17
 
18
18
  if sort:
19
19
  r = sorted(r)
@@ -23,12 +23,12 @@ def expand_range(s: str, sort: bool = True) -> list[int]:
23
23
 
24
24
  def consolidate_range(r: list[int]) -> str:
25
25
  """Returns a string representing a range from an input list of integers."""
26
- from typing import Generator
26
+ from collections.abc import Generator
27
27
 
28
28
  def ranges(i: list[int]) -> Generator[tuple[int, int], None, None]:
29
29
  import itertools
30
30
 
31
- for a, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
31
+ for _, b in itertools.groupby(enumerate(i), lambda pair: pair[1] - pair[0]):
32
32
  b_list = list(b)
33
33
  yield b_list[0][1], b_list[-1][1]
34
34
 
@@ -37,7 +37,7 @@ def consolidate_range(r: list[int]) -> str:
37
37
  for val in ls:
38
38
  entry = str(val[0])
39
39
  if val[0] != val[1]:
40
- entry += f'-{val[1]}'
40
+ entry += f"-{val[1]}"
41
41
  result.append(entry)
42
42
 
43
- return ', '.join(result)
43
+ return ", ".join(result)
@@ -7,13 +7,12 @@ def read_predict_data(filename: str) -> Predict:
7
7
  """Read predict data from given HDF5 file and return it."""
8
8
  import h5py
9
9
 
10
- from sonusai import SonusAIError
11
10
  from sonusai import logger
12
11
 
13
- logger.debug(f'Reading prediction data from {filename}')
14
- with h5py.File(filename, 'r') as f:
12
+ logger.debug(f"Reading prediction data from {filename}")
13
+ with h5py.File(filename, "r") as f:
15
14
  # prediction data is either [frames, num_classes], or [frames, timesteps, num_classes]
16
- predict = np.array(f['predict'])
15
+ predict = np.array(f["predict"])
17
16
 
18
17
  if predict.ndim == 2:
19
18
  return predict
@@ -21,10 +20,13 @@ def read_predict_data(filename: str) -> Predict:
21
20
  if predict.ndim == 3:
22
21
  frames, timesteps, num_classes = predict.shape
23
22
 
24
- logger.debug(f'Reshaping prediction data in {filename} 'f''
25
- f'from [{frames}, {timesteps}, {num_classes}] '
26
- f'to [{frames * timesteps}, {num_classes}]')
27
- predict = np.reshape(predict, [frames * timesteps, num_classes], order='F')
23
+ logger.debug(
24
+ f"Reshaping prediction data in {filename} "
25
+ f""
26
+ f"from [{frames}, {timesteps}, {num_classes}] "
27
+ f"to [{frames * timesteps}, {num_classes}]"
28
+ )
29
+ predict = np.reshape(predict, [frames * timesteps, num_classes], order="F")
28
30
  return predict
29
31
 
30
- raise SonusAIError(f'Invalid prediction data dimensions in {filename}')
32
+ raise RuntimeError(f"Invalid prediction data dimensions in {filename}")
sonusai/utils/reshape.py CHANGED
@@ -1,5 +1,3 @@
1
- from typing import Optional
2
-
3
1
  import numpy as np
4
2
 
5
3
  from sonusai.mixture.datatypes import Feature
@@ -11,12 +9,14 @@ def get_input_shape(feature: Feature) -> tuple[int, ...]:
11
9
  return feature.shape[1:]
12
10
 
13
11
 
14
- def reshape_inputs(feature: Feature,
15
- batch_size: int,
16
- truth: Optional[Truth] = None,
17
- timesteps: int = 0,
18
- flatten: bool = False,
19
- add1ch: bool = False) -> tuple[Feature, Optional[Truth]]:
12
+ def reshape_inputs(
13
+ feature: Feature,
14
+ batch_size: int,
15
+ truth: Truth | None = None,
16
+ timesteps: int = 0,
17
+ flatten: bool = False,
18
+ add1ch: bool = False,
19
+ ) -> tuple[Feature, Truth | None]:
20
20
  """Check SonusAI feature and truth data and reshape feature of size [frames, strides, feature_parameters] into
21
21
  one of several options:
22
22
 
@@ -38,16 +38,14 @@ def reshape_inputs(feature: Feature,
38
38
  feature reshaped feature
39
39
  truth reshaped truth
40
40
  """
41
- from sonusai import SonusAIError
42
-
43
41
  frames, strides, feature_parameters = feature.shape
44
42
  if truth is not None:
45
43
  truth_frames, num_classes = truth.shape
46
44
  # Double-check correctness of inputs
47
45
  if frames != truth_frames:
48
- raise SonusAIError('Frames in feature and truth do not match')
46
+ raise ValueError("Frames in feature and truth do not match")
49
47
  else:
50
- num_classes = None
48
+ num_classes = 0
51
49
 
52
50
  if flatten:
53
51
  feature = np.reshape(feature, (frames, strides * feature_parameters))
@@ -64,12 +62,12 @@ def reshape_inputs(feature: Feature,
64
62
  fr2drop = frames_rem + bf_rem
65
63
  if fr2drop:
66
64
  if feature.ndim == 2:
67
- feature = feature[0:-fr2drop, ] # flattened input
65
+ feature = feature[0:-fr2drop,] # flattened input
68
66
  elif feature.ndim == 3:
69
- feature = feature[0:-fr2drop, ] # un-flattened input
67
+ feature = feature[0:-fr2drop,] # un-flattened input
70
68
 
71
69
  if truth is not None:
72
- truth = truth[0:-fr2drop, ]
70
+ truth = truth[0:-fr2drop,]
73
71
 
74
72
  # Reshape
75
73
  if feature.ndim == 2: # flattened input
@@ -88,9 +86,9 @@ def reshape_inputs(feature: Feature,
88
86
  # Drop frames if remainder exists (not fitting into a multiple of new number of sequences)
89
87
  fr2drop = feature.shape[0] % batch_size
90
88
  if fr2drop > 0:
91
- feature = feature[0:-fr2drop, ]
89
+ feature = feature[0:-fr2drop,]
92
90
  if truth is not None:
93
- truth = truth[0:-fr2drop, ]
91
+ truth = truth[0:-fr2drop,]
94
92
 
95
93
  # Add channel dimension if required for input to model (i.e. for cnn type input)
96
94
  if add1ch:
@@ -119,25 +117,20 @@ def get_num_classes_from_predict(predict: Predict, timesteps: int = 0) -> int:
119
117
  return dims[1]
120
118
 
121
119
 
122
- def reshape_outputs(predict: Predict,
123
- truth: Optional[Truth] = None,
124
- timesteps: int = 0) -> tuple[Predict, Optional[Truth]]:
120
+ def reshape_outputs(predict: Predict, truth: Truth | None = None, timesteps: int = 0) -> tuple[Predict, Truth | None]:
125
121
  """Reshape model output data.
126
122
 
127
123
  truth and predict can be either [frames, num_classes], or [frames, timesteps, num_classes]
128
124
  In binary case, num_classes dim may not exist; detect this and set num_classes to 1.
129
125
  """
130
- from sonusai import SonusAIError
131
-
132
- if truth is not None:
133
- if predict.shape != truth.shape:
134
- raise SonusAIError('predict and truth shapes do not match')
126
+ if truth is not None and predict.shape != truth.shape:
127
+ raise ValueError("predict and truth shapes do not match")
135
128
 
136
129
  ndim = predict.ndim
137
130
  shape = predict.shape
138
131
 
139
132
  if not (0 < ndim <= 3):
140
- raise SonusAIError(f'do not know how to reshape data with {ndim} dimensions')
133
+ raise ValueError(f"do not know how to reshape data with {ndim} dimensions")
141
134
 
142
135
  if ndim == 3 or (ndim == 2 and timesteps > 0):
143
136
  if ndim == 2:
@@ -4,4 +4,4 @@ def seconds_to_hms(seconds: float) -> str:
4
4
  s = seconds - h * 3600
5
5
  m = int(s / 60)
6
6
  s = int(seconds - h * 3600 - m * 60)
7
- return f'{h:d}:{m:02d}:{s:02d} (H:MM:SS)'
7
+ return f"{h:d}:{m:02d}:{s:02d} (H:MM:SS)"
@@ -12,14 +12,12 @@ def stack_complex(unstacked: np.ndarray) -> np.ndarray:
12
12
  :return: A stacked array
13
13
  :raises TypeError:
14
14
  """
15
- from sonusai import SonusAIError
16
-
17
15
  if not unstacked.ndim > 1:
18
- raise SonusAIError('unstacked must have more than 1 dimension')
16
+ raise ValueError("unstacked must have more than 1 dimension")
19
17
 
20
18
  shape = list(unstacked.shape)
21
19
  shape[-1] = shape[-1] * 2
22
- stacked = np.empty(shape, dtype=np.complex64)
20
+ stacked = np.empty(shape, dtype=np.float32)
23
21
  half = unstacked.shape[-1]
24
22
  stacked[..., :half] = np.real(unstacked)
25
23
  stacked[..., half:] = np.imag(unstacked)
@@ -35,13 +33,11 @@ def unstack_complex(stacked: np.ndarray) -> np.ndarray:
35
33
  :return: An unstacked complex array
36
34
  :raises TypeError:
37
35
  """
38
- from sonusai import SonusAIError
39
-
40
36
  if not stacked.ndim > 1:
41
- raise SonusAIError('stacked must have more than 1 dimension')
37
+ raise ValueError("stacked must have more than 1 dimension")
42
38
 
43
39
  if stacked.shape[-1] % 2 != 0:
44
- raise SonusAIError('last dimension of stacked must be a multiple of 2')
40
+ raise ValueError("last dimension of stacked must be a multiple of 2")
45
41
 
46
42
  half = stacked.shape[-1] // 2
47
43
  unstacked = 1j * stacked[..., half:]
@@ -58,13 +54,11 @@ def stacked_complex_real(stacked: np.ndarray) -> np.ndarray:
58
54
  :return: The real elements
59
55
  :raises TypeError:
60
56
  """
61
- from sonusai import SonusAIError
62
-
63
57
  if not stacked.ndim > 1:
64
- raise SonusAIError('stacked must have more than 1 dimension')
58
+ raise ValueError("stacked must have more than 1 dimension")
65
59
 
66
60
  if stacked.shape[-1] % 2 != 0:
67
- raise SonusAIError('last dimension of stacked must be a multiple of 2')
61
+ raise ValueError("last dimension of stacked must be a multiple of 2")
68
62
 
69
63
  half = stacked.shape[-1] // 2
70
64
  return stacked[..., :half]
@@ -78,13 +72,11 @@ def stacked_complex_imag(stacked: np.ndarray) -> np.ndarray:
78
72
  :return: The imaginary elements
79
73
  :raises TypeError:
80
74
  """
81
- from sonusai import SonusAIError
82
-
83
75
  if not stacked.ndim > 1:
84
- raise SonusAIError('stacked must have more than 1 dimension')
76
+ raise ValueError("stacked must have more than 1 dimension")
85
77
 
86
78
  if stacked.shape[-1] % 2 != 0:
87
- raise SonusAIError('last dimension of stacked must be a multiple of 2')
79
+ raise ValueError("last dimension of stacked must be a multiple of 2")
88
80
 
89
81
  half = stacked.shape[-1] // 2
90
82
  return stacked[..., half:]
@@ -1,14 +1,14 @@
1
- from typing import Optional
2
-
3
1
  import numpy as np
4
2
 
5
3
  from sonusai.mixture import MixtureDatabase
6
4
 
7
5
 
8
- def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
9
- vsplit: float = 0.2,
10
- nsplit: int = 0,
11
- rnd_seed: Optional[int] = 0) -> tuple[list[int], list[int], np.ndarray, np.ndarray]:
6
+ def stratified_shuffle_split_mixid(
7
+ mixdb: MixtureDatabase,
8
+ vsplit: float = 0.2,
9
+ nsplit: int = 0,
10
+ rnd_seed: int | None = 0,
11
+ ) -> tuple[list[int], list[int], np.ndarray, np.ndarray]:
12
12
  """
13
13
  Create a training and test/validation list of mixture IDs from all mixtures in a mixture database.
14
14
  The test/validation split is specified by vsplit (0.0 to 1.0), default 0.2.
@@ -35,20 +35,18 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
35
35
  import random
36
36
  from copy import deepcopy
37
37
 
38
- from sonusai import SonusAIError
39
38
  from sonusai import logger
40
39
  from sonusai.mixture import get_class_count_from_mixids
41
- from sonusai.mixture import get_truth_indices_for_target
42
40
 
43
41
  if vsplit < 0 or vsplit > 1:
44
- raise SonusAIError('vsplit must be between 0 and 1')
42
+ raise ValueError("vsplit must be between 0 and 1")
45
43
 
46
44
  a_class_mixid: dict[int, list[int]] = {i + 1: [] for i in range(mixdb.num_classes)}
47
45
  for mixid, mixture in enumerate(mixdb.mixtures):
48
46
  class_count = get_class_count_from_mixids(mixdb, mixid)
49
- if any(class_count) or mixdb.truth_mutex == 0:
50
- for truth_index in get_truth_indices_for_target(mixdb.target_files[mixture.targets[0].file_id]):
51
- a_class_mixid[truth_index].append(mixid)
47
+ if any(class_count):
48
+ for class_index in mixdb.target_files[mixture.targets[0].file_id].class_indices:
49
+ a_class_mixid[class_index].append(mixid)
52
50
  else:
53
51
  # no counts and mutex mode means this is all 'other' class
54
52
  a_class_mixid[mixdb.num_classes].append(mixid)
@@ -80,11 +78,11 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
80
78
  # randomize order
81
79
  random.shuffle(indices)
82
80
 
83
- t_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[0:t_num_mixid[ci]]]
84
- v_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[t_num_mixid[ci]:]]
81
+ t_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[0 : t_num_mixid[ci]]]
82
+ v_class_mixid[ci] = [a_class_mixid[ci + 1][ii] for ii in indices[t_num_mixid[ci] :]]
85
83
 
86
84
  if np.any(~(t_num_mixid > 0)):
87
- logger.warning(f'Some classes have zero coverage: {np.where(~(t_num_mixid > 0))[0]}')
85
+ logger.warning(f"Some classes have zero coverage: {np.where(~(t_num_mixid > 0))[0]}")
88
86
 
89
87
  # Stratify over non-zero classes
90
88
  nz_indices = np.where(t_num_mixid > 0)[0]
@@ -97,8 +95,10 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
97
95
  # 2nd stage stratify by class_count/min(class_count-n3) n2 times
98
96
  n2 = int(max(min_class - n0 - n3, 0))
99
97
 
100
- logger.info(f'Stratifying training, x1 cnt {n0}: x(class_count/{n2}): x1 cnt {n3} x1, '
101
- f'for {len(nz_indices)} populated classes')
98
+ logger.info(
99
+ f"Stratifying training, x1 cnt {n0}: x(class_count/{n2}): x1 cnt {n3} x1, "
100
+ f"for {len(nz_indices)} populated classes"
101
+ )
102
102
 
103
103
  # initialize source list
104
104
  tt = deepcopy(t_class_mixid)
@@ -116,13 +116,13 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
116
116
  # which will leave approx n3 remaining
117
117
  if n2 > 0:
118
118
  # should always be non-zero
119
- min_class = np.min(t_num_mixid2 - n3)
119
+ min_class = int(np.min(t_num_mixid2 - n3))
120
120
  class_count = np.floor((t_num_mixid2 - n3) / min_class)
121
121
  # class_count = np.maximum(np.floor((t_num_mixid2 - n3) / n2),0) # Counts per class
122
122
  for _ in range(min_class):
123
123
  for ci in range(mixdb.num_classes):
124
124
  if class_count[ci] > 0:
125
- for cc in range(int(class_count[ci])):
125
+ for _ in range(int(class_count[ci])):
126
126
  # append first
127
127
  t_mixid.append(tt[ci][0])
128
128
  del tt[ci][0]
@@ -133,10 +133,10 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
133
133
  t_mixid = _extract_remaining_mixids(mixdb, t_mixid, t_num_mixid2, tt)
134
134
 
135
135
  if len(t_mixid) != sum(t_num_mixid):
136
- logger.warning('Final stratified training list length does not match starting list length.')
136
+ logger.warning("Final stratified training list length does not match starting list length.")
137
137
 
138
138
  if any(t_num_mixid2) or any(tt):
139
- logger.warning('Remaining training mixid list not empty.')
139
+ logger.warning("Remaining training mixid list not empty.")
140
140
 
141
141
  # Now stratify the validation list, which is probably not as important, so use simple method
142
142
  # initialize source list
@@ -145,18 +145,20 @@ def stratified_shuffle_split_mixid(mixdb: MixtureDatabase,
145
145
  v_mixid = _extract_remaining_mixids(mixdb, [], v_num_mixid2, vv)
146
146
 
147
147
  if len(v_mixid) != sum(v_num_mixid):
148
- logger.warning('Final stratified validation list length does not match starting lists length.')
148
+ logger.warning("Final stratified validation list length does not match starting lists length.")
149
149
 
150
150
  if any(v_num_mixid2) or any(vv):
151
- logger.warning('Remaining validation mixid list not empty.')
151
+ logger.warning("Remaining validation mixid list not empty.")
152
152
 
153
153
  return t_mixid, v_mixid, t_num_mixid, v_num_mixid
154
154
 
155
155
 
156
- def _extract_remaining_mixids(mixdb: MixtureDatabase,
157
- mixid: list[int],
158
- num_mixid: np.ndarray,
159
- class_mixid: list[list[int]]) -> list[int]:
156
+ def _extract_remaining_mixids(
157
+ mixdb: MixtureDatabase,
158
+ mixid: list[int],
159
+ num_mixid: np.ndarray,
160
+ class_mixid: list[list[int]],
161
+ ) -> list[int]:
160
162
  for _ in range(max(num_mixid)):
161
163
  for ci in range(mixdb.num_classes):
162
164
  if num_mixid[ci] > 0:
@@ -3,7 +3,7 @@ from sonusai.mixture.datatypes import AudioT
3
3
 
4
4
 
5
5
  def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
6
- """ Write an audio file.
6
+ """Write an audio file.
7
7
 
8
8
  To write multiple channels, use a 2D array of shape [channels, samples].
9
9
  The bits per sample and PCM/float are determined by the data type.
@@ -17,7 +17,7 @@ def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> Non
17
17
  if data.dim() == 1:
18
18
  data = torch.reshape(data, (1, data.shape[0]))
19
19
  if data.dim() != 2:
20
- raise ValueError(f'audio must be a 1D or 2D array')
20
+ raise ValueError("audio must be a 1D or 2D array")
21
21
 
22
22
  # Assuming data has more samples than channels, check if array needs to be transposed
23
23
  if data.shape[1] < data.shape[0]:
@@ -1,8 +1,8 @@
1
1
  def yes_or_no(question: str) -> bool:
2
2
  """Wait for yes or no input"""
3
3
  while True:
4
- reply = str(input(question + ' (y/n)?: ')).lower().strip()
5
- if reply[:1] == 'y':
4
+ reply = str(input(question + " (y/n)?: ")).lower().strip()
5
+ if reply[:1] == "y":
6
6
  return True
7
- if reply[:1] == 'n':
7
+ if reply[:1] == "n":
8
8
  return False
sonusai/vars.py CHANGED
@@ -23,18 +23,18 @@ def main() -> None:
23
23
 
24
24
  from sonusai.mixture import DEFAULT_NOISE
25
25
 
26
- print('Custom SonusAI variables:')
27
- print('')
28
- print(f'${{default_noise}}: {DEFAULT_NOISE}')
29
- print('')
30
- print('SonusAI recognized environment variables:')
31
- print('')
32
- print(f'DEEPGRAM_API_KEY {getenv("DEEPGRAM_API_KEY")}')
33
- print(f'GOOGLE_SPEECH_API_KEY {getenv("GOOGLE_SPEECH_API_KEY")}')
34
- print('')
35
- items = ['DEEPGRAM_API_KEY', 'GOOGLE_SPEECH_API_KEY']
36
- items += [item for item in environ.keys() if item.upper().startswith("AIXP_WHISPER_")]
37
-
38
-
39
- if __name__ == '__main__':
26
+ print("Custom SonusAI variables:")
27
+ print("")
28
+ print(f"${{default_noise}}: {DEFAULT_NOISE}")
29
+ print("")
30
+ print("SonusAI recognized environment variables:")
31
+ print("")
32
+ print(f"DEEPGRAM_API_KEY {getenv('DEEPGRAM_API_KEY')}")
33
+ print(f"GOOGLE_SPEECH_API_KEY {getenv('GOOGLE_SPEECH_API_KEY')}")
34
+ print("")
35
+ items = ["DEEPGRAM_API_KEY", "GOOGLE_SPEECH_API_KEY"]
36
+ items += [item for item in environ if item.upper().startswith("AIXP_WHISPER_")]
37
+
38
+
39
+ if __name__ == "__main__":
40
40
  main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.18.9
3
+ Version: 0.19.6
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -8,39 +8,38 @@ Author: Chris Eddington
8
8
  Author-email: chris@aaware.com
9
9
  Maintainer: Chris Eddington
10
10
  Maintainer-email: chris@aaware.com
11
- Requires-Python: >=3.9,<3.12
11
+ Requires-Python: >=3.11,<3.12
12
12
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
13
13
  Classifier: Programming Language :: Python :: 3
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
14
  Classifier: Programming Language :: Python :: 3.11
17
- Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
18
- Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
15
+ Requires-Dist: dataclasses-json (>=0.6.7,<0.7.0)
19
16
  Requires-Dist: docopt (>=0.6.2,<0.7.0)
20
- Requires-Dist: h5py (>=3.11.0,<4.0.0)
21
- Requires-Dist: jiwer (>=3.0.3,<4.0.0)
22
- Requires-Dist: librosa (>=0.10.1,<0.11.0)
23
- Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
17
+ Requires-Dist: h5py (>=3.12.1,<4.0.0)
18
+ Requires-Dist: jiwer (>=3.0.4,<4.0.0)
19
+ Requires-Dist: librosa (>=0.10.2.post1,<0.11.0)
20
+ Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
24
21
  Requires-Dist: mgzip (>=0.2.1,<0.3.0)
25
- Requires-Dist: numpy (>=1.26.4,<2.0.0)
26
- Requires-Dist: onnx (>=1.14.1,<2.0.0)
27
- Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
28
- Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
29
- Requires-Dist: pandas (>=2.1.1,<3.0.0)
22
+ Requires-Dist: numpy (>=1,<2)
23
+ Requires-Dist: onnx (>=1.17.0,<2.0.0)
24
+ Requires-Dist: onnxruntime (>=1.19.2,<2.0.0)
25
+ Requires-Dist: paho-mqtt (>=2.1.0,<3.0.0)
26
+ Requires-Dist: pandas (>=2.2.3,<3.0.0)
30
27
  Requires-Dist: pesq (>=0.0.4,<0.0.5)
31
28
  Requires-Dist: praatio (>=6.2.0,<7.0.0)
32
- Requires-Dist: psutil (>=5,<6)
33
- Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
29
+ Requires-Dist: psutil (>=6.0.0,<7.0.0)
30
+ Requires-Dist: pyaaware (>=1.5.18,<2.0.0)
34
31
  Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
35
32
  Requires-Dist: pydub (>=0.25.1,<0.26.0)
36
- Requires-Dist: pystoi (>=0.4.0,<0.5.0)
37
- Requires-Dist: requests (>=2.31.0,<3.0.0)
33
+ Requires-Dist: pystoi (>=0.4.1,<0.5.0)
34
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
35
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
36
+ Requires-Dist: rich (>=13.9.4,<14.0.0)
38
37
  Requires-Dist: samplerate (>=0.2.1,<0.3.0)
39
38
  Requires-Dist: soundfile (>=0.12.1,<0.13.0)
40
- Requires-Dist: sox (>=1.4.1,<2.0.0)
39
+ Requires-Dist: sox (>=1.5.0,<2.0.0)
41
40
  Requires-Dist: torch (>=2.2,<2.3)
42
41
  Requires-Dist: torchaudio (>=2.2,<2.3)
43
- Requires-Dist: tqdm (>=4.66.1,<5.0.0)
42
+ Requires-Dist: tqdm (>=4.66.5,<5.0.0)
44
43
  Description-Content-Type: text/x-rst
45
44
 
46
45
  SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI