sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. sonusai/__init__.py +16 -3
  2. sonusai/audiofe.py +241 -77
  3. sonusai/calc_metric_spenh.py +71 -73
  4. sonusai/config/__init__.py +3 -0
  5. sonusai/config/config.py +61 -0
  6. sonusai/config/config.yml +20 -0
  7. sonusai/config/constants.py +8 -0
  8. sonusai/constants.py +11 -0
  9. sonusai/data/genmixdb.yml +21 -36
  10. sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
  11. sonusai/deprecated/plot.py +4 -5
  12. sonusai/doc/doc.py +4 -4
  13. sonusai/doc.py +11 -4
  14. sonusai/genft.py +43 -45
  15. sonusai/genmetrics.py +25 -19
  16. sonusai/genmix.py +54 -82
  17. sonusai/genmixdb.py +88 -264
  18. sonusai/ir_metric.py +30 -34
  19. sonusai/lsdb.py +41 -48
  20. sonusai/main.py +15 -22
  21. sonusai/metrics/calc_audio_stats.py +4 -293
  22. sonusai/metrics/calc_class_weights.py +4 -4
  23. sonusai/metrics/calc_optimal_thresholds.py +8 -5
  24. sonusai/metrics/calc_pesq.py +2 -2
  25. sonusai/metrics/calc_segsnr_f.py +4 -4
  26. sonusai/metrics/calc_speech.py +25 -13
  27. sonusai/metrics/class_summary.py +7 -7
  28. sonusai/metrics/confusion_matrix_summary.py +5 -5
  29. sonusai/metrics/one_hot.py +4 -4
  30. sonusai/metrics/snr_summary.py +7 -7
  31. sonusai/metrics_summary.py +38 -45
  32. sonusai/mixture/__init__.py +4 -104
  33. sonusai/mixture/audio.py +10 -39
  34. sonusai/mixture/class_balancing.py +103 -0
  35. sonusai/mixture/config.py +251 -271
  36. sonusai/mixture/constants.py +35 -39
  37. sonusai/mixture/data_io.py +25 -36
  38. sonusai/mixture/db_datatypes.py +58 -22
  39. sonusai/mixture/effects.py +386 -0
  40. sonusai/mixture/feature.py +7 -11
  41. sonusai/mixture/generation.py +478 -628
  42. sonusai/mixture/helpers.py +82 -184
  43. sonusai/mixture/ir_delay.py +3 -4
  44. sonusai/mixture/ir_effects.py +77 -0
  45. sonusai/mixture/log_duration_and_sizes.py +6 -12
  46. sonusai/mixture/mixdb.py +910 -729
  47. sonusai/mixture/pad_audio.py +35 -0
  48. sonusai/mixture/resample.py +7 -0
  49. sonusai/mixture/sox_effects.py +195 -0
  50. sonusai/mixture/sox_help.py +650 -0
  51. sonusai/mixture/spectral_mask.py +2 -2
  52. sonusai/mixture/truth.py +17 -15
  53. sonusai/mixture/truth_functions/crm.py +12 -12
  54. sonusai/mixture/truth_functions/energy.py +22 -22
  55. sonusai/mixture/truth_functions/file.py +5 -5
  56. sonusai/mixture/truth_functions/metadata.py +4 -4
  57. sonusai/mixture/truth_functions/metrics.py +4 -4
  58. sonusai/mixture/truth_functions/phoneme.py +3 -3
  59. sonusai/mixture/truth_functions/sed.py +11 -13
  60. sonusai/mixture/truth_functions/target.py +10 -10
  61. sonusai/mkwav.py +26 -29
  62. sonusai/onnx_predict.py +240 -88
  63. sonusai/queries/__init__.py +2 -2
  64. sonusai/queries/queries.py +38 -34
  65. sonusai/speech/librispeech.py +1 -1
  66. sonusai/speech/mcgill.py +1 -1
  67. sonusai/speech/timit.py +2 -2
  68. sonusai/summarize_metric_spenh.py +10 -17
  69. sonusai/utils/__init__.py +7 -1
  70. sonusai/utils/asl_p56.py +2 -2
  71. sonusai/utils/asr.py +2 -2
  72. sonusai/utils/asr_functions/aaware_whisper.py +4 -5
  73. sonusai/utils/choice.py +31 -0
  74. sonusai/utils/compress.py +1 -1
  75. sonusai/utils/dataclass_from_dict.py +19 -1
  76. sonusai/utils/energy_f.py +3 -3
  77. sonusai/utils/evaluate_random_rule.py +15 -0
  78. sonusai/utils/keyboard_interrupt.py +12 -0
  79. sonusai/utils/onnx_utils.py +3 -17
  80. sonusai/utils/print_mixture_details.py +21 -19
  81. sonusai/utils/{temp_seed.py → rand.py} +3 -3
  82. sonusai/utils/read_predict_data.py +2 -2
  83. sonusai/utils/reshape.py +3 -3
  84. sonusai/utils/stratified_shuffle_split.py +3 -3
  85. sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
  86. sonusai/utils/write_audio.py +2 -2
  87. sonusai/vars.py +11 -4
  88. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
  89. sonusai-1.0.2.dist-info/RECORD +138 -0
  90. sonusai/mixture/augmentation.py +0 -444
  91. sonusai/mixture/class_count.py +0 -15
  92. sonusai/mixture/eq_rule_is_valid.py +0 -45
  93. sonusai/mixture/target_class_balancing.py +0 -107
  94. sonusai/mixture/targets.py +0 -175
  95. sonusai-0.20.3.dist-info/RECORD +0 -128
  96. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
  97. {sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,23 @@
1
1
  import numpy as np
2
2
 
3
- from sonusai.mixture.constants import SAMPLE_RATE
4
- from sonusai.mixture.datatypes import SpeechMetrics
5
-
3
+ from ..constants import SAMPLE_RATE
4
+ from ..datatypes import SpeechMetrics
6
5
  from .calc_pesq import calc_pesq
7
6
 
8
7
 
9
- def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE) -> SpeechMetrics:
10
- """Calculate speech metrics pesq, c_sig, c_bak, and c_ovl.
8
+ def calc_speech(
9
+ hypothesis: np.ndarray,
10
+ reference: np.ndarray,
11
+ pesq: float | None = None,
12
+ sample_rate: int = SAMPLE_RATE,
13
+ ) -> SpeechMetrics:
14
+ """Calculate speech metrics c_sig, c_bak, and c_ovl.
11
15
 
12
16
  These are all related and thus included in one function. Reference: matlab script "compute_metrics.m".
13
17
 
14
18
  :param hypothesis: estimated audio
15
19
  :param reference: reference audio
20
+ :param pesq: pesq
16
21
  :param sample_rate: sample rate of audio
17
22
  :return: SpeechMetrics named tuple
18
23
  """
@@ -36,18 +41,21 @@ def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int
36
41
  seg_snr = np.mean(segsnr_dist)
37
42
 
38
43
  # PESQ
39
- _pesq = calc_pesq(hypothesis=hypothesis, reference=reference, sample_rate=sample_rate)
44
+ if pesq is None:
45
+ pesq = calc_pesq(hypothesis=hypothesis, reference=reference, sample_rate=sample_rate)
40
46
 
41
47
  # Now compute the composite measures
42
- csig = float(np.clip(3.093 - 1.029 * llr_mean + 0.603 * _pesq - 0.009 * wss_dist, 1, 5))
43
- cbak = float(np.clip(1.634 + 0.478 * _pesq - 0.007 * wss_dist + 0.063 * seg_snr, 1, 5))
44
- covl = float(np.clip(1.594 + 0.805 * _pesq - 0.512 * llr_mean - 0.007 * wss_dist, 1, 5))
48
+ csig = float(np.clip(3.093 - 1.029 * llr_mean + 0.603 * pesq - 0.009 * wss_dist, 1, 5))
49
+ cbak = float(np.clip(1.634 + 0.478 * pesq - 0.007 * wss_dist + 0.063 * seg_snr, 1, 5))
50
+ covl = float(np.clip(1.594 + 0.805 * pesq - 0.512 * llr_mean - 0.007 * wss_dist, 1, 5))
45
51
 
46
- return SpeechMetrics(_pesq, csig, cbak, covl)
52
+ return SpeechMetrics(csig, cbak, covl)
47
53
 
48
54
 
49
55
  def _calc_weighted_spectral_slope_measure(
50
- hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
56
+ hypothesis: np.ndarray,
57
+ reference: np.ndarray,
58
+ sample_rate: int = SAMPLE_RATE,
51
59
  ) -> np.ndarray:
52
60
  from scipy.fftpack import fft
53
61
 
@@ -250,7 +258,9 @@ def _calc_weighted_spectral_slope_measure(
250
258
 
251
259
 
252
260
  def _calc_log_likelihood_ratio_measure(
253
- hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
261
+ hypothesis: np.ndarray,
262
+ reference: np.ndarray,
263
+ sample_rate: int = SAMPLE_RATE,
254
264
  ) -> np.ndarray:
255
265
  from scipy.linalg import toeplitz
256
266
 
@@ -296,7 +306,9 @@ def _calc_log_likelihood_ratio_measure(
296
306
 
297
307
 
298
308
  def _calc_snr(
299
- hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
309
+ hypothesis: np.ndarray,
310
+ reference: np.ndarray,
311
+ sample_rate: int = SAMPLE_RATE,
300
312
  ) -> tuple[float, np.ndarray]:
301
313
  # The lengths of the reference and hypothesis must be the same.
302
314
  reference_length = len(reference)
@@ -2,10 +2,10 @@
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
 
5
- from sonusai.mixture import GeneralizedIDs
6
- from sonusai.mixture import MixtureDatabase
7
- from sonusai.mixture import Predict
8
- from sonusai.mixture import Truth
5
+ from ..datatypes import GeneralizedIDs
6
+ from ..datatypes import Predict
7
+ from ..datatypes import Truth
8
+ from ..mixture.mixdb import MixtureDatabase
9
9
 
10
10
 
11
11
  def class_summary(
@@ -31,7 +31,7 @@ def class_summary(
31
31
  macro avg 0.85 0.83 0.84 0.05 0.96 3768
32
32
  micro-avgwo
33
33
  """
34
- from sonusai.metrics import one_hot
34
+ from ..metrics.one_hot import one_hot
35
35
 
36
36
  num_classes = truth_f.shape[1]
37
37
 
@@ -58,11 +58,11 @@ def class_summary(
58
58
  else:
59
59
  row_n = [f"Class {i}" for i in range(1, num_classes + 1)]
60
60
 
61
- df = pd.DataFrame(metrics[:, table_idx], columns=col_n, index=row_n) # pyright: ignore [reportArgumentType]
61
+ df = pd.DataFrame(metrics[:, table_idx], columns=col_n, index=row_n) # pyright: ignore [reportArgumentType]
62
62
 
63
63
  # [miPPV, miTPR, miF1, miFPR, miACC, miAP, miAUC, TPSUM]
64
64
  avg_row_n = ["Macro-avg", "Micro-avg", "Weighted-avg"]
65
- dfavg = pd.DataFrame(metavg, columns=col_n, index=avg_row_n) # pyright: ignore [reportArgumentType]
65
+ dfavg = pd.DataFrame(metavg, columns=col_n, index=avg_row_n) # pyright: ignore [reportArgumentType]
66
66
 
67
67
  # dfblank = pd.DataFrame([''])
68
68
  # pd.concat([df, dfblank, dfblank, dfavg])
@@ -2,10 +2,10 @@
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
 
5
- from sonusai.mixture import GeneralizedIDs
6
- from sonusai.mixture import MixtureDatabase
7
- from sonusai.mixture import Predict
8
- from sonusai.mixture import Truth
5
+ from ..datatypes import GeneralizedIDs
6
+ from ..datatypes import Predict
7
+ from ..datatypes import Truth
8
+ from ..mixture.mixdb import MixtureDatabase
9
9
 
10
10
 
11
11
  def confusion_matrix_summary(
@@ -30,7 +30,7 @@ def confusion_matrix_summary(
30
30
 
31
31
  Returns pandas dataframes of confusion matrix cmdf and normalized confusion matrix cmndf.
32
32
  """
33
- from sonusai.metrics import one_hot
33
+ from ..metrics.one_hot import one_hot
34
34
 
35
35
  num_classes = truth_f.shape[1]
36
36
  # TODO: re-work for modern mixdb API
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
- from sonusai.mixture.datatypes import Predict
4
- from sonusai.mixture.datatypes import Truth
3
+ from ..datatypes import Predict
4
+ from ..datatypes import Truth
5
5
 
6
6
 
7
7
  def one_hot(
@@ -53,8 +53,8 @@ def one_hot(
53
53
  from sklearn.metrics import precision_recall_fscore_support
54
54
  from sklearn.metrics import roc_auc_score
55
55
 
56
- from sonusai.utils import get_num_classes_from_predict
57
- from sonusai.utils import reshape_outputs
56
+ from ..utils.reshape import get_num_classes_from_predict
57
+ from ..utils.reshape import reshape_outputs
58
58
 
59
59
  if truth.shape != predict.shape:
60
60
  raise ValueError("truth and predict are not the same shape")
@@ -2,11 +2,11 @@
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
 
5
- from sonusai.mixture import GeneralizedIDs
6
- from sonusai.mixture import MixtureDatabase
7
- from sonusai.mixture import Predict
8
- from sonusai.mixture import Segsnr
9
- from sonusai.mixture import Truth
5
+ from ..datatypes import GeneralizedIDs
6
+ from ..datatypes import Predict
7
+ from ..datatypes import Segsnr
8
+ from ..datatypes import Truth
9
+ from ..mixture.mixdb import MixtureDatabase
10
10
 
11
11
 
12
12
  def snr_summary(
@@ -40,8 +40,8 @@ def snr_summary(
40
40
  """
41
41
  import warnings
42
42
 
43
- from sonusai.metrics import one_hot
44
- from sonusai.queries import get_mixids_from_snr
43
+ from ..metrics.one_hot import one_hot
44
+ from ..queries.queries import get_mixids_from_snr
45
45
 
46
46
  num_classes = truth_f.shape[1]
47
47
 
@@ -16,23 +16,9 @@ Inputs:
16
16
 
17
17
  """
18
18
 
19
- import signal
20
-
21
19
  import numpy as np
22
20
  import pandas as pd
23
21
 
24
-
25
- def signal_handler(_sig, _frame):
26
- import sys
27
-
28
- from sonusai import logger
29
-
30
- logger.info("Canceled due to keyboard interrupt")
31
- sys.exit(1)
32
-
33
-
34
- signal.signal(signal.SIGINT, signal_handler)
35
-
36
22
  DB_99 = np.power(10, 99 / 10)
37
23
  DB_N99 = np.power(10, -99 / 10)
38
24
 
@@ -49,8 +35,8 @@ def _process_mixture(
49
35
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
50
36
  from os.path import basename
51
37
 
38
+ from sonusai.constants import SAMPLE_RATE
52
39
  from sonusai.metrics import calc_wer
53
- from sonusai.mixture import SAMPLE_RATE
54
40
  from sonusai.mixture import MixtureDatabase
55
41
 
56
42
  mixdb = MixtureDatabase(location)
@@ -61,11 +47,11 @@ def _process_mixture(
61
47
  duration = samples / SAMPLE_RATE
62
48
  tf_frames = mixdb.mixture_transform_frames(m_id)
63
49
  feat_frames = mixdb.mixture_feature_frames(m_id)
64
- mxsnr = mixdb.mixture(m_id).snr
65
- ti = mixdb.mixture(m_id).targets[0].file_id
50
+ mxsnr = mixdb.mixture(m_id).noise.snr
51
+ ti = mixdb.mixture(m_id).sources["primary"].file_id
66
52
  ni = mixdb.mixture(m_id).noise.file_id
67
- t0file = basename(mixdb.target_file(ti).name)
68
- nfile = basename(mixdb.noise_file(ni).name)
53
+ t0file = basename(mixdb.source_file(ti).name)
54
+ nfile = basename(mixdb.source_file(ni).name)
69
55
 
70
56
  all_metrics = mixdb.mixture_metrics(m_id, all_metric_names)
71
57
 
@@ -104,10 +90,10 @@ def _process_mixture(
104
90
  def main() -> None:
105
91
  from docopt import docopt
106
92
 
107
- from sonusai import __version__ as sonusai_ver
108
- from sonusai.utils import trim_docstring
93
+ from . import __version__ as sai_version
94
+ from .utils.docstring import trim_docstring
109
95
 
110
- args = docopt(trim_docstring(__doc__), version=sonusai_ver, options_first=True)
96
+ args = docopt(trim_docstring(__doc__), version=sai_version, options_first=True)
111
97
 
112
98
  verbose = args["--verbose"]
113
99
  wrlist = args["--write-list"]
@@ -121,24 +107,22 @@ def main() -> None:
121
107
 
122
108
  import psutil
123
109
 
124
- from sonusai import create_file_handler
125
- from sonusai import initial_log_messages
126
- from sonusai import logger
127
- from sonusai import update_console_handler
128
- from sonusai.mixture import MixtureDatabase
129
- from sonusai.utils import create_timestamp
130
- from sonusai.utils import par_track
131
- from sonusai.utils import track
110
+ from . import create_file_handler
111
+ from . import initial_log_messages
112
+ from . import logger
113
+ from . import update_console_handler
114
+ from .mixture.mixdb import MixtureDatabase
115
+ from .utils.create_timestamp import create_timestamp
116
+ from .utils.parallel import par_track
117
+ from .utils.parallel import track
132
118
 
133
- try:
134
- mixdb = MixtureDatabase(location)
135
- print(f"Found SonusAI mixture database with {mixdb.num_mixtures} mixtures.")
136
- except:
137
- print(f"Could not open SonusAI mixture database in {location}, exiting ...")
138
- return
119
+ mixdb = MixtureDatabase(location)
120
+ print(f"Found SonusAI mixture database with {mixdb.num_mixtures} mixtures.")
139
121
 
140
122
  # Only check first and last mixture in order to save time
141
- metrics_present = mixdb.cached_metrics([0, mixdb.num_mixtures - 1])
123
+ metrics_present = mixdb.cached_metrics([0, mixdb.num_mixtures - 1]) # return pre-generated metrics in mixdb tree
124
+ if "mxsnr" in metrics_present:
125
+ metrics_present.remove("mxsnr")
142
126
 
143
127
  num_metrics_present = len(metrics_present)
144
128
  if num_metrics_present < 1:
@@ -188,8 +172,8 @@ def main() -> None:
188
172
  if len(metval) > 1:
189
173
  logger.warning(f"Mixid {mixids[0]} metric {metric} has a list with more than 1 element, using first.")
190
174
  metval = metval[0] # remove any list
191
- if isinstance(metval, float):
192
- logger.debug("Metric is scalar float, entering in summary table.")
175
+ if isinstance(metval, float | int):
176
+ logger.debug(f"Metric is scalar {type(metval)}, entering in summary table.")
193
177
  scalar_metric_names.append(metric)
194
178
  elif isinstance(metval, str):
195
179
  logger.debug("Metric is string, will summarize with word count.")
@@ -205,7 +189,7 @@ def main() -> None:
205
189
  else:
206
190
  logger.warning(f"Mixid {mixids[0]} metric {metric} is a vector of improper size, ignoring.")
207
191
 
208
- # Setup pandas table for summarizing scalar metrics
192
+ # Setup pandas table for summarizing scalar metrics, always include mxsnr first
209
193
  ptab_labels = [
210
194
  "mxsnr",
211
195
  *scalar_metric_names,
@@ -276,7 +260,7 @@ def main() -> None:
276
260
  ptab1.round(2).to_csv(wlcsv_name, **table_args)
277
261
  ptab1_sorted = ptab1.sort_values(by=["mxsnr", "t0file"])
278
262
 
279
- # Create metrics table except except -99 SNR
263
+ # Create metrics table except -99 SNR
280
264
  ptab1_nom99 = ptab1_sorted[ptab1_sorted.mxsnr != -99]
281
265
 
282
266
  # Create summary by SNR for all scalar metrics, taking mean
@@ -294,7 +278,7 @@ def main() -> None:
294
278
  nmixtot = mixdb.num_mixtures
295
279
  pd.DataFrame([["Timestamp", timestamp]]).to_csv(snrcsv_name, header=False, index=False)
296
280
  pd.DataFrame(['"Metrics avg over each SNR:"']).to_csv(snrcsv_name, **header_args)
297
- mtab_snr_summary.round(2).to_csv(snrcsv_name, index=False, **table_args)
281
+ mtab_snr_summary.round(2).T.to_csv(snrcsv_name, index=True, header=False, mode="a", encoding="utf-8")
298
282
  pd.DataFrame(["--"]).to_csv(snrcsv_name, header=False, index=False, mode="a")
299
283
  pd.DataFrame([f'"Metrics stats over {nmix} mixtures out of {nmixtot} total:"']).to_csv(snrcsv_name, **header_args)
300
284
  ptab1.describe().round(2).T.to_csv(snrcsv_name, index=True, **table_args)
@@ -304,12 +288,14 @@ def main() -> None:
304
288
  )
305
289
  ptab1_nom99.describe().round(2).T.to_csv(snrcsv_name, index=True, **table_args)
306
290
 
307
- # Write summary to .csv
291
+ # Write summary to text file
308
292
  snrtxt_name = str(join(location, "metric_summary_snr" + fsuffix + ".txt"))
309
293
  with open(snrtxt_name, "w") as f:
310
294
  print(f"Timestamp: {timestamp}", file=f)
311
295
  print("Metrics avg over each SNR:", file=f)
312
- print(mtab_snr_summary.round(2).to_string(float_format=lambda x: f"{x:.2f}", index=False), file=f)
296
+ print(
297
+ mtab_snr_summary.round(2).T.to_string(float_format=lambda x: f"{x:.2f}", index=True, header=False), file=f
298
+ )
313
299
  print("", file=f)
314
300
  print(f"Metrics stats over {len(mixids)} mixtures out of {mixdb.num_mixtures} total:", file=f)
315
301
  print(ptab1.describe().round(2).T.to_string(float_format=lambda x: f"{x:.2f}", index=True), file=f)
@@ -319,4 +305,11 @@ def main() -> None:
319
305
 
320
306
 
321
307
  if __name__ == "__main__":
322
- main()
308
+ from sonusai import exception_handler
309
+ from sonusai.utils import register_keyboard_interrupt
310
+
311
+ register_keyboard_interrupt()
312
+ try:
313
+ main()
314
+ except Exception as e:
315
+ exception_handler(e)
@@ -1,131 +1,31 @@
1
1
  # SonusAI mixture utilities
2
2
  # ruff: noqa: F401
3
3
 
4
- from .audio import get_duration
5
- from .audio import get_next_noise
6
- from .audio import get_num_samples
7
- from .audio import get_sample_rate
8
- from .audio import raw_read_audio
9
4
  from .audio import read_audio
10
- from .audio import read_ir
11
- from .audio import validate_input_file
12
- from .augmentation import apply_augmentation
13
- from .augmentation import apply_gain
14
- from .augmentation import apply_impulse_response
15
- from .augmentation import augmentation_from_rule
16
- from .augmentation import estimate_augmented_length_from_length
17
- from .augmentation import evaluate_random_rule
18
- from .augmentation import get_augmentation_indices_for_mixup
19
- from .augmentation import get_augmentation_rules
20
- from .augmentation import get_mixups
21
- from .augmentation import pad_audio_to_length
22
- from .class_count import get_class_count_from_mixids
23
- from .config import get_default_config
24
- from .config import get_impulse_response_files
25
- from .config import get_noise_files
26
- from .config import get_spectral_masks
27
- from .config import get_target_files
28
- from .config import get_truth_parameters
5
+ from .config import get_ir_files
6
+ from .config import get_source_files
29
7
  from .config import load_config
30
- from .config import raw_load_config
31
- from .config import update_config_from_file
32
- from .config import update_config_from_hierarchy
33
- from .config import validate_truth_configs
34
- from .constants import BIT_DEPTH
35
- from .constants import CHANNEL_COUNT
36
- from .constants import DEFAULT_CONFIG
37
- from .constants import DEFAULT_NOISE
38
- from .constants import DEFAULT_SPEECH
39
- from .constants import ENCODING
40
- from .constants import FLOAT_BYTES
41
- from .constants import MIXDB_VERSION
42
- from .constants import RAND_PATTERN
43
- from .constants import REQUIRED_CONFIGS
44
- from .constants import REQUIRED_TRUTH_CONFIGS
45
- from .constants import SAMPLE_BYTES
46
- from .constants import SAMPLE_RATE
47
- from .constants import VALID_AUGMENTATIONS
48
- from .constants import VALID_CONFIGS
49
- from .constants import VALID_NOISE_MIX_MODES
50
- from .data_io import clear_cached_data
51
8
  from .data_io import read_cached_data
52
9
  from .data_io import write_cached_data
53
- from .datatypes import AudioF
54
- from .datatypes import AudioStatsMetrics
55
- from .datatypes import AudioT
56
- from .datatypes import Augmentation
57
- from .datatypes import AugmentationEffects
58
- from .datatypes import AugmentationRule
59
- from .datatypes import AugmentationRuleEffects
60
- from .datatypes import AugmentedTarget
61
- from .datatypes import ClassCount
62
- from .datatypes import EnergyF
63
- from .datatypes import EnergyT
64
- from .datatypes import Feature
65
- from .datatypes import FeatureGeneratorConfig
66
- from .datatypes import FeatureGeneratorInfo
67
- from .datatypes import GeneralizedIDs
68
- from .datatypes import GenFTData
69
- from .datatypes import GenMixData
70
- from .datatypes import ImpulseResponseData
71
- from .datatypes import ImpulseResponseFile
72
- from .datatypes import MetricDoc
73
- from .datatypes import MetricDocs
74
- from .datatypes import Mixture
75
- from .datatypes import MixtureDatabaseConfig
76
- from .datatypes import NoiseFile
77
- from .datatypes import Predict
78
- from .datatypes import Segsnr
79
- from .datatypes import SnrFMetrics
80
- from .datatypes import SpectralMask
81
- from .datatypes import SpeechMetadata
82
- from .datatypes import SpeechMetrics
83
- from .datatypes import TargetFile
84
- from .datatypes import TransformConfig
85
- from .datatypes import Truth
86
- from .datatypes import TruthConfig
87
- from .datatypes import TruthConfigs
88
- from .datatypes import TruthDict
89
- from .datatypes import TruthParameter
90
- from .datatypes import UniversalSNR
10
+ from .effects import get_effect_rules
91
11
  from .feature import get_audio_from_feature
92
12
  from .feature import get_feature_from_audio
93
13
  from .generation import generate_mixtures
94
- from .generation import get_all_snrs_from_config
95
14
  from .generation import initialize_db
96
15
  from .generation import populate_class_label_table
97
16
  from .generation import populate_class_weights_threshold_table
98
17
  from .generation import populate_impulse_response_file_table
99
18
  from .generation import populate_mixture_table
100
- from .generation import populate_noise_file_table
19
+ from .generation import populate_source_file_table
101
20
  from .generation import populate_spectral_mask_table
102
- from .generation import populate_target_file_table
103
21
  from .generation import populate_top_table
104
22
  from .generation import populate_truth_parameters_table
105
23
  from .generation import update_mixid_width
106
24
  from .generation import update_mixture
107
- from .helpers import augmented_noise_samples
108
- from .helpers import augmented_target_samples
109
25
  from .helpers import check_audio_files_exist
110
26
  from .helpers import forward_transform
111
- from .helpers import frames_from_samples
112
- from .helpers import get_audio_from_transform
113
- from .helpers import get_transform_from_audio
114
27
  from .helpers import inverse_transform
115
- from .helpers import mixture_metadata
116
28
  from .helpers import write_mixture_metadata
117
- from .ir_delay import get_impulse_response_delay
118
29
  from .log_duration_and_sizes import log_duration_and_sizes
119
30
  from .mixdb import MixtureDatabase
120
31
  from .mixdb import db_file
121
- from .spectral_mask import apply_spectral_mask
122
- from .target_class_balancing import balance_targets
123
- from .targets import get_augmented_target_ids_by_class
124
- from .targets import get_augmented_target_ids_for_mixup
125
- from .targets import get_augmented_targets
126
- from .targets import get_target_augmentations_for_mixup
127
- from .tokenized_shell_vars import tokenized_expand
128
- from .tokenized_shell_vars import tokenized_replace
129
- from .truth import get_truth_indices_for_mixid
130
- from .truth import truth_function
131
- from .truth import truth_stride_reduction
sonusai/mixture/audio.py CHANGED
@@ -1,8 +1,7 @@
1
1
  from functools import lru_cache
2
2
  from pathlib import Path
3
3
 
4
- from sonusai.mixture.datatypes import AudioT
5
- from sonusai.mixture.datatypes import ImpulseResponseData
4
+ from ..datatypes import AudioT
6
5
 
7
6
 
8
7
  def get_next_noise(audio: AudioT, offset: int, length: int) -> AudioT:
@@ -24,7 +23,7 @@ def get_duration(audio: AudioT) -> float:
24
23
  :param audio: Time domain data [samples]
25
24
  :return: Duration of audio in seconds
26
25
  """
27
- from .constants import SAMPLE_RATE
26
+ from ..constants import SAMPLE_RATE
28
27
 
29
28
  return len(audio) / SAMPLE_RATE
30
29
 
@@ -66,7 +65,7 @@ def _get_sample_rate(name: str | Path) -> int:
66
65
  import soundfile
67
66
  from pydub import AudioSegment
68
67
 
69
- from .tokenized_shell_vars import tokenized_expand
68
+ from ..utils.tokenized_shell_vars import tokenized_expand
70
69
 
71
70
  expanded_name, _ = tokenized_expand(name)
72
71
 
@@ -90,7 +89,7 @@ def raw_read_audio(name: str | Path) -> tuple[AudioT, int]:
90
89
  import soundfile
91
90
  from pydub import AudioSegment
92
91
 
93
- from .tokenized_shell_vars import tokenized_expand
92
+ from ..utils.tokenized_shell_vars import tokenized_expand
94
93
 
95
94
  expanded_name, _ = tokenized_expand(name)
96
95
 
@@ -135,40 +134,12 @@ def _read_audio(name: str | Path) -> AudioT:
135
134
  :param name: File name
136
135
  :return: Array of time domain audio data
137
136
  """
138
- import librosa
137
+ from ..constants import SAMPLE_RATE
138
+ from .resample import resample
139
139
 
140
- from .constants import SAMPLE_RATE
141
-
142
- out, sample_rate = raw_read_audio(name)
143
- out = librosa.resample(out, orig_sr=sample_rate, target_sr=SAMPLE_RATE, res_type="soxr_hq")
144
-
145
- return out
146
-
147
-
148
- def read_ir(name: str | Path, delay: int, use_cache: bool = True) -> ImpulseResponseData:
149
- """Read impulse response data
150
-
151
- :param name: File name
152
- :param delay: Delay in samples
153
- :param use_cache: If true, use LRU caching
154
- :return: ImpulseResponseData object
155
- """
156
- if use_cache:
157
- return _read_ir(name, delay)
158
- return _read_ir.__wrapped__(name, delay)
159
-
160
-
161
- @lru_cache
162
- def _read_ir(name: str | Path, delay: int) -> ImpulseResponseData:
163
- """Read impulse response data using soundfile
164
-
165
- :param name: File name
166
- :param delay: Delay in samples
167
- :return: ImpulseResponseData object
168
- """
169
140
  out, sample_rate = raw_read_audio(name)
170
141
 
171
- return ImpulseResponseData(data=out, sample_rate=sample_rate, delay=delay)
142
+ return resample(out, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
172
143
 
173
144
 
174
145
  def get_num_samples(name: str | Path, use_cache: bool = True) -> int:
@@ -195,8 +166,8 @@ def _get_num_samples(name: str | Path) -> int:
195
166
  import soundfile
196
167
  from pydub import AudioSegment
197
168
 
198
- from .constants import SAMPLE_RATE
199
- from .tokenized_shell_vars import tokenized_expand
169
+ from ..constants import SAMPLE_RATE
170
+ from ..utils.tokenized_shell_vars import tokenized_expand
200
171
 
201
172
  expanded_name, _ = tokenized_expand(name)
202
173
 
@@ -209,7 +180,7 @@ def _get_num_samples(name: str | Path) -> int:
209
180
  samples = sound.frame_count()
210
181
  sample_rate = sound.frame_rate
211
182
  else:
212
- info = soundfile.info(name)
183
+ info = soundfile.info(expanded_name)
213
184
  samples = info.frames
214
185
  sample_rate = info.samplerate
215
186