PyPI - sonusai - Versions diffs - 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

sonusai/__init__.py +16 -3
sonusai/audiofe.py +241 -77
sonusai/calc_metric_spenh.py +71 -73
sonusai/config/__init__.py +3 -0
sonusai/config/config.py +61 -0
sonusai/config/config.yml +20 -0
sonusai/config/constants.py +8 -0
sonusai/constants.py +11 -0
sonusai/data/genmixdb.yml +21 -36
sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
sonusai/deprecated/plot.py +4 -5
sonusai/doc/doc.py +4 -4
sonusai/doc.py +11 -4
sonusai/genft.py +43 -45
sonusai/genmetrics.py +25 -19
sonusai/genmix.py +54 -82
sonusai/genmixdb.py +88 -264
sonusai/ir_metric.py +30 -34
sonusai/lsdb.py +41 -48
sonusai/main.py +15 -22
sonusai/metrics/calc_audio_stats.py +4 -293
sonusai/metrics/calc_class_weights.py +4 -4
sonusai/metrics/calc_optimal_thresholds.py +8 -5
sonusai/metrics/calc_pesq.py +2 -2
sonusai/metrics/calc_segsnr_f.py +4 -4
sonusai/metrics/calc_speech.py +25 -13
sonusai/metrics/class_summary.py +7 -7
sonusai/metrics/confusion_matrix_summary.py +5 -5
sonusai/metrics/one_hot.py +4 -4
sonusai/metrics/snr_summary.py +7 -7
sonusai/metrics_summary.py +38 -45
sonusai/mixture/__init__.py +4 -104
sonusai/mixture/audio.py +10 -39
sonusai/mixture/class_balancing.py +103 -0
sonusai/mixture/config.py +251 -271
sonusai/mixture/constants.py +35 -39
sonusai/mixture/data_io.py +25 -36
sonusai/mixture/db_datatypes.py +58 -22
sonusai/mixture/effects.py +386 -0
sonusai/mixture/feature.py +7 -11
sonusai/mixture/generation.py +478 -628
sonusai/mixture/helpers.py +82 -184
sonusai/mixture/ir_delay.py +3 -4
sonusai/mixture/ir_effects.py +77 -0
sonusai/mixture/log_duration_and_sizes.py +6 -12
sonusai/mixture/mixdb.py +910 -729
sonusai/mixture/pad_audio.py +35 -0
sonusai/mixture/resample.py +7 -0
sonusai/mixture/sox_effects.py +195 -0
sonusai/mixture/sox_help.py +650 -0
sonusai/mixture/spectral_mask.py +2 -2
sonusai/mixture/truth.py +17 -15
sonusai/mixture/truth_functions/crm.py +12 -12
sonusai/mixture/truth_functions/energy.py +22 -22
sonusai/mixture/truth_functions/file.py +5 -5
sonusai/mixture/truth_functions/metadata.py +4 -4
sonusai/mixture/truth_functions/metrics.py +4 -4
sonusai/mixture/truth_functions/phoneme.py +3 -3
sonusai/mixture/truth_functions/sed.py +11 -13
sonusai/mixture/truth_functions/target.py +10 -10
sonusai/mkwav.py +26 -29
sonusai/onnx_predict.py +240 -88
sonusai/queries/__init__.py +2 -2
sonusai/queries/queries.py +38 -34
sonusai/speech/librispeech.py +1 -1
sonusai/speech/mcgill.py +1 -1
sonusai/speech/timit.py +2 -2
sonusai/summarize_metric_spenh.py +10 -17
sonusai/utils/__init__.py +7 -1
sonusai/utils/asl_p56.py +2 -2
sonusai/utils/asr.py +2 -2
sonusai/utils/asr_functions/aaware_whisper.py +4 -5
sonusai/utils/choice.py +31 -0
sonusai/utils/compress.py +1 -1
sonusai/utils/dataclass_from_dict.py +19 -1
sonusai/utils/energy_f.py +3 -3
sonusai/utils/evaluate_random_rule.py +15 -0
sonusai/utils/keyboard_interrupt.py +12 -0
sonusai/utils/onnx_utils.py +3 -17
sonusai/utils/print_mixture_details.py +21 -19
sonusai/utils/{temp_seed.py → rand.py} +3 -3
sonusai/utils/read_predict_data.py +2 -2
sonusai/utils/reshape.py +3 -3
sonusai/utils/stratified_shuffle_split.py +3 -3
sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
sonusai/utils/write_audio.py +2 -2
sonusai/vars.py +11 -4
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
sonusai-1.0.2.dist-info/RECORD +138 -0
sonusai/mixture/augmentation.py +0 -444
sonusai/mixture/class_count.py +0 -15
sonusai/mixture/eq_rule_is_valid.py +0 -45
sonusai/mixture/target_class_balancing.py +0 -107
sonusai/mixture/targets.py +0 -175
sonusai-0.20.3.dist-info/RECORD +0 -128
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0

sonusai/calc_metric_spenh.py CHANGED Viewed

@@ -67,34 +67,23 @@ Inputs:
 """
-import signal
+from typing import Any
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from sonusai.mixture import AudioF
-from sonusai.mixture import AudioT
-from sonusai.mixture import Feature
+from sonusai.datatypes import AudioF
+from sonusai.datatypes import AudioT
+from sonusai.datatypes import Feature
+from sonusai.datatypes import Predict
 from sonusai.mixture import MixtureDatabase
-from sonusai.mixture import Predict
 DB_99 = np.power(10, 99 / 10)
 DB_N99 = np.power(10, -99 / 10)
-def signal_handler(_sig, _frame):
-    import sys
-    from sonusai import logger
-    logger.info("Canceled due to keyboard interrupt")
-    sys.exit(1)
-signal.signal(signal.SIGINT, signal_handler)
 matplotlib.use("SVG")
@@ -192,8 +181,8 @@ def plot_mixpred(
     feature: Feature | None = None,
     predict: Predict | None = None,
     tp_title: str = "",
-) -> plt.Figure:  # pyright: ignore [reportPrivateImportUsage]
-    from sonusai.mixture import SAMPLE_RATE
+) -> tuple[plt.Figure, Any]:  # pyright: ignore [reportPrivateImportUsage]
+    from sonusai.constants import SAMPLE_RATE
     num_plots = 2
     if feature is not None:
@@ -229,7 +218,7 @@ def plot_mixpred(
         ax[p].set_title("Predict " + tp_title)
         plt.colorbar(im, location="bottom")
-    return fig
+    return fig, ax
 def plot_pdb_predict_truth(
@@ -291,7 +280,7 @@ def plot_e_predict_truth(
     truth_wav: np.ndarray | None = None,
     metric: np.ndarray | None = None,
     tp_title: str = "",
-) -> plt.Figure:  # pyright: ignore [reportPrivateImportUsage]
+) -> tuple[plt.Figure, Any]:  # pyright: ignore [reportPrivateImportUsage]
     """Plot predict spectrogram and waveform and optionally truth and a metric)"""
     num_plots = 2
     if truth_f is not None:
@@ -337,18 +326,19 @@ def plot_e_predict_truth(
         ax[p].set_xlim(x_axis[0], x_axis[-1])
         ax[p].set_ylim([-0.01, np.max(metric1) + 0.01])
         if metric.ndim > 1 and metric.shape[1] > 1:
+            p += 1
             metr2 = metric[:, 1]
-            ax2 = ax[p].twinx()
+            ax = np.append(ax, np.array(ax[p - 1].twinx()))
             color2 = "blue"
-            ax2.plot(x_axis, metr2, color=color2, label="phase dist (deg)")
+            ax[p].plot(x_axis, metr2, color=color2, label="phase dist (deg)")
             # ax2.set_ylim([-180.0, +180.0])
             if np.max(metr2) - np.min(metr2) > 0.1:
-                ax2.set_ylim([np.min(metr2), np.max(metr2)])
-            ax2.set_ylabel("phase dist (deg)", color=color2)
-            ax2.tick_params(axis="y", labelcolor=color2)
+                ax[p].set_ylim([np.min(metr2), np.max(metr2)])
+            ax[p].set_ylabel("phase dist (deg)", color=color2)
+            ax[p].tick_params(axis="y", labelcolor=color2)
             # ax[p].set_title('SNR and SNR mse (mean over freq. db)')
-    return fig
+    return fig, ax
 def _process_mixture(
@@ -368,12 +358,13 @@ def _process_mixture(
     from os.path import splitext
     import h5py
-    import mgzip
+    import pgzip
     from matplotlib.backends.backend_pdf import PdfPages
     from pystoi import stoi
     from sonusai import logger
     from sonusai.metrics import calc_pcm
+    from sonusai.metrics import calc_pesq
     from sonusai.metrics import calc_phase_distance
     from sonusai.metrics import calc_speech
     from sonusai.metrics import calc_wer
@@ -422,16 +413,16 @@ def _process_mixture(
             predict = stack_complex(predict)
     # 2) Collect true target, noise, mixture data, trim to predict size if needed
-    tmp = mixdb.mixture_targets(m_id)  # time-dom augmented targets is list of pre-IR and pre-specaugment targets
-    target_f = mixdb.mixture_targets_f(m_id, targets=tmp)[0]
-    target = tmp[0]
+    tmp = mixdb.mixture_sources(m_id)  # time-dom augmented targets is list of pre-IR and pre-specaugment targets
+    target_f = mixdb.mixture_sources_f(m_id, sources=tmp)["primary"]
+    target = tmp["primary"]
     mixture = mixdb.mixture_mixture(m_id)  # note: gives full reverberated/distorted target, but no specaugment
     # noise_wo_dist = mixdb.mixture_noise(mixid)            # noise without specaugment and distortion
     # noise_wo_dist_f = mixdb.mixture_noise_f(mixid, noise=noise_wo_dist)
     noise = mixture - target  # has time-domain distortion (ir,etc.) but does not have specaugment
     # noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
     # note: uses pre-IR, pre-specaug audio
-    segsnr_f = mixdb.mixture_metrics(m_id, ["ssnr"])["ssnr"][0]
+    segsnr_f = mixdb.mixture_metrics(m_id, ["ssnr"])["ssnr"]  # Why [0] removed?
     mixture_f = mixdb.mixture_mixture_f(m_id, mixture=mixture)
     noise_f = mixture_f - target_f  # true noise in freq domain includes specaugment and time-domain ir,distortions
     # segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
@@ -446,7 +437,7 @@ def _process_mixture(
     # gen feature, truth - note feature only used for plots
     # TODO: parse truth_f for different formats
     feature, truth_all = mixdb.mixture_ft(m_id, mixture_f=mixture_f)
-    truth_f = truth_all[target_f_key]
+    truth_f = truth_all["primary"][target_f_key]
     if truth_f.ndim > 2:  # note this may not be needed anymore as all target_f truth is 3 dims
         if truth_f.shape[1] != 1:
             logger.info("Error: target_f truth has stride > 1, exiting.")
@@ -488,7 +479,7 @@ def _process_mixture(
         predict = truth_f  # substitute truth for the prediction (for test/debug)
         predict_complex = unstack_complex(predict)  # unstack
         # if feature has compressed mag and truth does not, compress it
-        if mixdb.feature[0:1] == "h" and not mixdb.truth_configs[first_key(mixdb.truth_configs)].function.startswith(
+        if mixdb.feature[0:1] == "h" and not first_key(mixdb.category_truth_configs("primary")).startswith(
             "targetcmpr"
         ):
             predict_complex = power_compress(predict_complex)  # from uncompressed truth
@@ -535,23 +526,24 @@ def _process_mixture(
     # logger.debug(f'wsdr ccoefs for mixid {mixid} = {wsdr_cc}.')
     # Speech intelligibility measure - PESQ
-    if int(mixdb.mixture(m_id).snr) > -99:
+    if int(mixdb.mixture(m_id).noise.snr) > -99:
         # len = target_est_wav.shape[0]
-        pesq_speech, csig_tg, cbak_tg, covl_tg = calc_speech(target_est_wav, target_fi)
+        pesq_speech = calc_pesq(target_est_wav, target_fi)
+        csig_tg, cbak_tg, covl_tg = calc_speech(target_est_wav, target_fi, pesq=pesq_speech)
         metrics = mixdb.mixture_metrics(m_id, ["mxpesq", "mxcsig", "mxcbak", "mxcovl"])
-        pesq_mixture = metrics["mxpesq"]
-        csig_mx = metrics["mxcsig"]
-        cbak_mx = metrics["mxcbak"]
-        covl_mx = metrics["mxcovl"]
+        pesq_mx = metrics["mxpesq"][0] if isinstance(metrics["mxpesq"], list) else metrics["mxpesq"]
+        csig_mx = metrics["mxcsig"][0] if isinstance(metrics["mxcsig"], list) else metrics["mxcsig"]
+        cbak_mx = metrics["mxcbak"][0] if isinstance(metrics["mxcbak"], list) else metrics["mxcbak"]
+        covl_mx = metrics["mxcovl"][0] if isinstance(metrics["mxcovl"], list) else metrics["mxcovl"]
         # pesq_speech_tst = calc_pesq(hypothesis=target_est_wav, reference=target)
         # pesq_mixture_tst = calc_pesq(hypothesis=mixture, reference=target)
         # pesq improvement
-        pesq_impr = pesq_speech - pesq_mixture
+        pesq_impr = pesq_speech - pesq_mx
         # pesq improvement %
-        pesq_impr_pc = pesq_impr / (pesq_mixture + np.finfo(np.float32).eps) * 100
+        pesq_impr_pc = pesq_impr / (pesq_mx + np.finfo(np.float32).eps) * 100
     else:
         pesq_speech = 0
-        pesq_mixture = 0
+        pesq_mx = 0
         pesq_impr_pc = np.float32(0)
         csig_mx = 0
         csig_tg = 0
@@ -565,14 +557,14 @@ def _process_mixture(
     asr_mx = None
     asr_tge = None
     # asr_engines = list(mixdb.asr_configs.keys())
-    if asr_method is not None and mixdb.mixture(m_id).snr >= -96:  # noise only, ignore/reset target ASR
+    if asr_method is not None and mixdb.mixture(m_id).noise.snr >= -96:  # noise only, ignore/reset target ASR
         asr_mx_name = f"mxasr.{asr_method}"
         wer_mx_name = f"mxwer.{asr_method}"
         asr_tt_name = f"tasr.{asr_method}"
         metrics = mixdb.mixture_metrics(m_id, [asr_mx_name, wer_mx_name, asr_tt_name])
-        asr_mx = metrics[asr_mx_name][0]
-        wer_mx = metrics[wer_mx_name][0]
-        asr_tt = metrics[asr_tt_name][0]
+        asr_mx = metrics[asr_mx_name][0] if isinstance(metrics[asr_mx_name], list) else metrics[asr_mx_name]
+        wer_mx = metrics[wer_mx_name][0] if isinstance(metrics[wer_mx_name], list) else metrics[wer_mx_name]
+        asr_tt = metrics[asr_tt_name][0] if isinstance(metrics[asr_tt_name], list) else metrics[asr_tt_name]
         if asr_tt:
             noiseadd = None  # TBD add as switch, default -30
@@ -628,11 +620,11 @@ def _process_mixture(
         "SPFILE",
         "NFILE",
     ]
-    ti = mixdb.mixture(m_id).targets[0].file_id
+    ti = mixdb.mixture(m_id).sources["primary"].file_id
     ni = mixdb.mixture(m_id).noise.file_id
     metr1 = [
-        mixdb.mixture(m_id).snr,
-        pesq_mixture,
+        mixdb.mixture(m_id).noise.snr,
+        pesq_mx,
         pesq_speech,
         pesq_impr_pc,
         wer_mx,
@@ -650,8 +642,8 @@ def _process_mixture(
         cbak_tg,
         covl_mx,
         covl_tg,
-        basename(mixdb.target_file(ti).name),
-        basename(mixdb.noise_file(ni).name),
+        basename(mixdb.source_file(ti).name),
+        basename(mixdb.source_file(ni).name),
     ]
     mtab1 = pd.DataFrame([metr1], columns=mtable1_col, index=[m_id])
@@ -669,7 +661,7 @@ def _process_mixture(
     )
     dat1row = metr2.loc[["mean", "min", "50%", "max", "std"], :].T.stack().to_numpy().reshape((1, -1))
     mtab2 = pd.DataFrame(dat1row, index=[m_id], columns=new_labels)
-    mtab2.insert(0, "MXSNR", mixdb.mixture(m_id).snr, False)  # add MXSNR as the first metric column
+    mtab2.insert(0, "MXSNR", mixdb.mixture(m_id).noise.snr, False)  # add MXSNR as the first metric column
     all_metrics_table_1 = mtab1  # return to be collected by process
     all_metrics_table_2 = mtab2  # return to be collected by process
@@ -686,8 +678,8 @@ def _process_mixture(
         print(f"Extraction statistics over {mixture_f.shape[0]} frames:", file=f)
         print(metr2.round(2).to_string(float_format=lambda x: f"{x:.2f}"), file=f)
         print("", file=f)
-        print(f"Target path: {mixdb.target_file(ti).name}", file=f)
-        print(f"Noise path: {mixdb.noise_file(ni).name}", file=f)
+        print(f"Target path: {mixdb.source_file(ti).name}", file=f)
+        print(f"Noise path: {mixdb.source_file(ni).name}", file=f)
         if asr_method != "none":
             print(f"ASR method: {asr_method}", file=f)
             print(f"ASR truth:  {asr_tt}", file=f)
@@ -746,7 +738,7 @@ def _process_mixture(
             #     tfunc_name = tfunc_name + ' (db)'
             mixspec = 20 * np.log10(abs(mixture_f) + np.finfo(np.float32).eps)
-            fig_obj = plot_mixpred(
+            fig, ax = plot_mixpred(
                 mixture=mixture,
                 mixture_f=mixspec,
                 target=target,
@@ -754,9 +746,8 @@ def _process_mixture(
                 predict=predplot,
                 tp_title=tfunc_name,
             )
-            pdf.savefig(fig_obj)
-            with mgzip.open(base_name + "_metric_spenh_fig1.mfigz", "wb") as f:
-                pickle.dump(fig_obj, f)
+            pdf.savefig(fig)
+            pickle.dump((fig, ax), pgzip.open(base_name + "_metric_spenh_fig1.pkl.gz", "wb"))
             # ----- page 2, plot unmapped predict, opt truth reconstructed and line plots of mean-over-f
             # pdf.savefig(plot_pdb_predtruth(predict=pred_snr_f, tp_title='predict snr_f (db)'))
@@ -765,7 +756,7 @@ def _process_mixture(
             tg_spec = 20 * np.log10(abs(target_f) + np.finfo(np.float32).eps)
             tg_est_spec = 20 * np.log10(abs(predict_complex) + np.finfo(np.float32).eps)
             # n_spec = np.reshape(n_spec,(n_spec.shape[0] * n_spec.shape[1], n_spec.shape[2]))
-            fig_obj = plot_e_predict_truth(
+            fig, ax = plot_e_predict_truth(
                 predict=tg_est_spec,
                 predict_wav=target_est_wav,
                 truth_f=tg_spec,
@@ -773,14 +764,13 @@ def _process_mixture(
                 metric=np.vstack((lerr_tg_frame, phd_frame)).T,
                 tp_title="speech estimate",
             )
-            pdf.savefig(fig_obj)
-            with mgzip.open(base_name + "_metric_spenh_fig2.mfigz", "wb") as f:
-                pickle.dump(fig_obj, f)
+            pdf.savefig(fig)
+            pickle.dump((fig, ax), pgzip.open(base_name + "_metric_spenh_fig2.pkl.gz", "wb"))
             # page 4 noise extraction
             n_spec = 20 * np.log10(abs(noise_f) + np.finfo(np.float32).eps)
             n_est_spec = 20 * np.log10(abs(noise_est_complex) + np.finfo(np.float32).eps)
-            fig_obj = plot_e_predict_truth(
+            fig, ax = plot_e_predict_truth(
                 predict=n_est_spec,
                 predict_wav=noise_est_wav,
                 truth_f=n_spec,
@@ -788,9 +778,8 @@ def _process_mixture(
                 metric=lerr_n_frame,
                 tp_title="noise estimate",
             )
-            pdf.savefig(fig_obj)
-            with mgzip.open(base_name + "_metric_spenh_fig4.mfigz", "wb") as f:
-                pickle.dump(fig_obj, f)
+            pdf.savefig(fig)
+            pickle.dump((fig, ax), pgzip.open(base_name + "_metric_spenh_fig4.pkl.gz", "wb"))
             # Plot error waveforms
             # tg_err_wav = target_fi - target_est_wav
@@ -871,12 +860,14 @@ def main():
     )
     # speech enhancement metrics and audio truth requires target_f truth type, check it is present
     target_f_key = None
-    logger.info(f"mixdb has {len(mixdb.truth_configs)} truth types defined, checking that target_f type is present.")
-    for key in mixdb.truth_configs:
-        if mixdb.truth_configs[key].function == "target_f":
+    logger.info(
+        f"mixdb has {len(mixdb.category_truth_configs('primary'))} truth types defined for primary, checking that target_f type is present."
+    )
+    for key in mixdb.category_truth_configs("primary"):
+        if mixdb.category_truth_configs("primary")[key] == "target_f":
             target_f_key = key
     if target_f_key is None:
-        logger.error("mixdb does not have target_f truth define, required for speech enhancement metrics, exiting.")
+        logger.error("mixdb does not have target_f truth defined, required for speech enhancement metrics, exiting.")
         raise SystemExit(1)
     logger.info(f"Only running specified subset of {len(mixids)} mixtures")
@@ -924,8 +915,8 @@ def main():
         no_par = True
         num_cpus = None
     else:
-        no_par = True
-        num_cpus = None
+        no_par = False
+        num_cpus = use_cpu
     all_metrics_tables = par_track(
         partial(
@@ -1101,7 +1092,14 @@ def main():
 if __name__ == "__main__":
-    main()
+    from sonusai import exception_handler
+    from sonusai.utils import register_keyboard_interrupt
+    register_keyboard_interrupt()
+    try:
+        main()
+    except Exception as e:
+        exception_handler(e)
 # if asr_method == 'none':
 #     fnb = 'metric_spenh_'

sonusai/config/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+# ruff: noqa: F401
+from .config import load_config

sonusai/config/config.py ADDED Viewed

@@ -0,0 +1,61 @@
+def _load_yaml(name: str) -> dict:
+    """Load YAML file
+    :param name: File name
+    :return: Dictionary of config data
+    """
+    import yaml
+    with open(file=name) as f:
+        config = yaml.safe_load(f)
+    return config
+def _default_config() -> dict:
+    """Load default SonusAI config
+    :return: Dictionary of default config data
+    """
+    from .constants import DEFAULT_CONFIG
+    try:
+        return _load_yaml(DEFAULT_CONFIG)
+    except Exception as e:
+        raise OSError(f"Error loading default config: {e}") from e
+def _update_config_from_file(filename: str, given_config: dict) -> dict:
+    """Update the given config with the config in the specified YAML file
+    :param filename: File name
+    :param given_config: Config dictionary to update
+    :return: Updated config dictionary
+    """
+    from copy import deepcopy
+    updated_config = deepcopy(given_config)
+    try:
+        file_config = _load_yaml(filename)
+    except Exception as e:
+        raise OSError(f"Error loading config from {filename}: {e}") from e
+    # Use default config as base and overwrite with given config keys as found
+    if file_config:
+        for key in updated_config:
+            if key in file_config:
+                updated_config[key] = file_config[key]
+    return updated_config
+def load_config(name: str) -> dict:
+    """Load SonusAI default config and update with given location (performing SonusAI variable substitution)
+    :param name: Directory containing mixture database
+    :return: Dictionary of config data
+    """
+    from os.path import join
+    return _update_config_from_file(filename=join(name, "config.yml"), given_config=_default_config())

sonusai/config/config.yml ADDED Viewed

@@ -0,0 +1,20 @@
+# Default configuration for sonusai
+# The values in this file are the defaults used if they are not specified in a
+# local config.
+feature: ""
+target_level_type: default
+class_indices: 1
+num_classes: 1
+class_labels: [ ]
+seed: 0
+class_weights_threshold: 0.5
+asr_configs: { }

sonusai/config/constants.py ADDED Viewed

@@ -0,0 +1,8 @@
+from importlib.resources import as_file
+from importlib.resources import files
+REQUIRED_TRUTH_CONFIG_FIELDS = ["function", "stride_reduction"]
+REQUIRED_ASR_CONFIG_FIELDS = ["engine"]
+with as_file(files("sonusai.config").joinpath("config.yml")) as path:
+    DEFAULT_CONFIG = str(path)

sonusai/constants.py ADDED Viewed

@@ -0,0 +1,11 @@
+from importlib.resources import as_file
+from importlib.resources import files
+SAMPLE_RATE = 16000
+CHANNEL_COUNT = 1
+BIT_DEPTH = 32
+SAMPLE_BYTES = BIT_DEPTH // 8
+FLOAT_BYTES = 4
+with as_file(files("sonusai.data").joinpath("whitenoise.wav")) as path:
+    DEFAULT_NOISE = str(path)

sonusai/data/genmixdb.yml CHANGED Viewed

@@ -3,54 +3,41 @@
 # The values in this file are the defaults used if they are not specified in a
 # local config.
-feature: ""
-target_level_type: default
-class_indices: 1
+seed: 0
-targets: [ ]
+feature: ""
 num_classes: 1
-class_labels: [ ]
+asr_configs: { }
-seed: 0
+level_type: default
+class_indices: 1
+class_labels: [ ]
 class_weights_threshold: 0.5
-truth_configs: { }
-asr_manifest: [ ]
-target_augmentations:
-  - pre:
-class_balancing_augmentation:
-  normalize: -3.5
-  pitch: "rand(-300, 300)"
-  tempo: "rand(0.8, 1.2)"
-  eq1: [ "rand(50, 250)", "rand(0.6, 1.0)", "rand(-6, 6)" ]
-  eq2: [ "rand(250, 1200)", "rand(0.6, 1.0)", "rand(-6, 6)" ]
-  eq3: [ "rand(1200, 6000)", "rand(0.6, 1.0)", "rand(-6, 6)" ]
+class_balancing_effect:
+  - norm -3.5
+  - pitch sai_rand(-300, 300)
+  - tempo -s sai_rand(0.8, 1.2)
+  - equalizer sai_rand(50, 250) sai_rand(0.2, 2.0) sai_rand(-6, 6)
+  - equalizer sai_rand(250, 1200) sai_rand(0.2, 2.0) sai_rand(-6, 6)
+  - equalizer sai_rand(1200, 6000) sai_rand(0.2, 2.0) sai_rand(-6, 6)
 class_balancing: false
-noises:
-  - "${default_noise}"
-noise_augmentations:
-  - pre:
-      normalize: -3.5
-snrs:
-  - 99
+impulse_responses: [ ]
-random_snrs: [ ]
+sources:
+  primary:
+    files: [ ]
+  noise:
+    files: [ ]
-noise_mix_mode: exhaustive
+summed_source_effects: [ ]
-impulse_responses: [ ]
+mixture_effects: [ ]
 spectral_masks:
   - f_max_width: 27
@@ -58,5 +45,3 @@ spectral_masks:
     t_max_width: 100
     t_num: 0
     t_max_percent: 100
-asr_configs: { }

sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl