PyPI - sonusai - Versions diffs - 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl - Mend

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

sonusai/__init__.py +20 -29
sonusai/aawscd_probwrite.py +18 -18
sonusai/audiofe.py +93 -80
sonusai/calc_metric_spenh.py +395 -321
sonusai/data/genmixdb.yml +5 -11
sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
sonusai/{plot.py → deprecated/plot.py} +177 -131
sonusai/{tplot.py → deprecated/tplot.py} +124 -102
sonusai/doc/__init__.py +1 -1
sonusai/doc/doc.py +112 -177
sonusai/doc.py +10 -10
sonusai/genft.py +93 -77
sonusai/genmetrics.py +59 -46
sonusai/genmix.py +116 -104
sonusai/genmixdb.py +194 -153
sonusai/lsdb.py +56 -66
sonusai/main.py +23 -20
sonusai/metrics/__init__.py +2 -0
sonusai/metrics/calc_audio_stats.py +29 -24
sonusai/metrics/calc_class_weights.py +7 -7
sonusai/metrics/calc_optimal_thresholds.py +5 -7
sonusai/metrics/calc_pcm.py +3 -3
sonusai/metrics/calc_pesq.py +10 -7
sonusai/metrics/calc_phase_distance.py +3 -3
sonusai/metrics/calc_sa_sdr.py +10 -8
sonusai/metrics/calc_segsnr_f.py +15 -17
sonusai/metrics/calc_speech.py +105 -47
sonusai/metrics/calc_wer.py +35 -32
sonusai/metrics/calc_wsdr.py +10 -7
sonusai/metrics/class_summary.py +30 -27
sonusai/metrics/confusion_matrix_summary.py +25 -22
sonusai/metrics/one_hot.py +91 -57
sonusai/metrics/snr_summary.py +53 -46
sonusai/mixture/__init__.py +19 -14
sonusai/mixture/audio.py +4 -6
sonusai/mixture/augmentation.py +37 -43
sonusai/mixture/class_count.py +5 -14
sonusai/mixture/config.py +292 -225
sonusai/mixture/constants.py +41 -30
sonusai/mixture/data_io.py +155 -0
sonusai/mixture/datatypes.py +111 -108
sonusai/mixture/db_datatypes.py +54 -70
sonusai/mixture/eq_rule_is_valid.py +6 -9
sonusai/mixture/feature.py +50 -46
sonusai/mixture/generation.py +522 -389
sonusai/mixture/helpers.py +217 -272
sonusai/mixture/log_duration_and_sizes.py +16 -13
sonusai/mixture/mixdb.py +677 -473
sonusai/mixture/soundfile_audio.py +12 -17
sonusai/mixture/sox_audio.py +91 -112
sonusai/mixture/sox_augmentation.py +8 -9
sonusai/mixture/spectral_mask.py +4 -6
sonusai/mixture/target_class_balancing.py +41 -36
sonusai/mixture/targets.py +69 -67
sonusai/mixture/tokenized_shell_vars.py +23 -23
sonusai/mixture/torchaudio_audio.py +14 -15
sonusai/mixture/torchaudio_augmentation.py +23 -27
sonusai/mixture/truth.py +48 -26
sonusai/mixture/truth_functions/__init__.py +26 -0
sonusai/mixture/truth_functions/crm.py +56 -38
sonusai/mixture/truth_functions/datatypes.py +37 -0
sonusai/mixture/truth_functions/energy.py +85 -59
sonusai/mixture/truth_functions/file.py +30 -30
sonusai/mixture/truth_functions/phoneme.py +14 -7
sonusai/mixture/truth_functions/sed.py +71 -45
sonusai/mixture/truth_functions/target.py +69 -106
sonusai/mkwav.py +52 -85
sonusai/onnx_predict.py +46 -43
sonusai/queries/__init__.py +3 -1
sonusai/queries/queries.py +100 -59
sonusai/speech/__init__.py +2 -0
sonusai/speech/l2arctic.py +24 -23
sonusai/speech/librispeech.py +16 -17
sonusai/speech/mcgill.py +22 -21
sonusai/speech/textgrid.py +32 -25
sonusai/speech/timit.py +45 -42
sonusai/speech/vctk.py +14 -13
sonusai/speech/voxceleb.py +26 -20
sonusai/summarize_metric_spenh.py +11 -10
sonusai/utils/__init__.py +4 -3
sonusai/utils/asl_p56.py +1 -1
sonusai/utils/asr.py +37 -17
sonusai/utils/asr_functions/__init__.py +2 -0
sonusai/utils/asr_functions/aaware_whisper.py +18 -12
sonusai/utils/audio_devices.py +12 -12
sonusai/utils/braced_glob.py +6 -8
sonusai/utils/calculate_input_shape.py +1 -4
sonusai/utils/compress.py +2 -2
sonusai/utils/convert_string_to_number.py +1 -3
sonusai/utils/create_timestamp.py +1 -1
sonusai/utils/create_ts_name.py +2 -2
sonusai/utils/dataclass_from_dict.py +1 -1
sonusai/utils/docstring.py +6 -6
sonusai/utils/energy_f.py +9 -7
sonusai/utils/engineering_number.py +56 -54
sonusai/utils/get_label_names.py +8 -10
sonusai/utils/human_readable_size.py +2 -2
sonusai/utils/model_utils.py +3 -5
sonusai/utils/numeric_conversion.py +2 -4
sonusai/utils/onnx_utils.py +43 -32
sonusai/utils/parallel.py +40 -27
sonusai/utils/print_mixture_details.py +25 -22
sonusai/utils/ranges.py +12 -12
sonusai/utils/read_predict_data.py +11 -9
sonusai/utils/reshape.py +19 -26
sonusai/utils/seconds_to_hms.py +1 -1
sonusai/utils/stacked_complex.py +8 -16
sonusai/utils/stratified_shuffle_split.py +29 -27
sonusai/utils/write_audio.py +2 -2
sonusai/utils/yes_or_no.py +3 -3
sonusai/vars.py +14 -14
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
sonusai-0.19.5.dist-info/RECORD +125 -0
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
sonusai/mixture/truth_functions/data.py +0 -58
sonusai/utils/read_mixture_data.py +0 -14
sonusai-0.18.8.dist-info/RECORD +0 -125
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0

sonusai/mixture/generation.py CHANGED Viewed

@@ -1,24 +1,25 @@
-from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import AudiosT
-from sonusai.mixture.datatypes import Augmentation
-from sonusai.mixture.datatypes import AugmentationRules
-from sonusai.mixture.datatypes import AugmentedTargets
-from sonusai.mixture.datatypes import GenMixData
-from sonusai.mixture.datatypes import ImpulseResponseFiles
-from sonusai.mixture.datatypes import Mixture
-from sonusai.mixture.datatypes import Mixtures
-from sonusai.mixture.datatypes import NoiseFiles
-from sonusai.mixture.datatypes import SpectralMasks
-from sonusai.mixture.datatypes import TargetFiles
-from sonusai.mixture.datatypes import Targets
-from sonusai.mixture.datatypes import UniversalSNRGenerator
-from sonusai.mixture.mixdb import MixtureDatabase
+# ruff: noqa: S608
+from .datatypes import AudiosT
+from .datatypes import AudioT
+from .datatypes import Augmentation
+from .datatypes import AugmentationRules
+from .datatypes import AugmentedTargets
+from .datatypes import GenMixData
+from .datatypes import ImpulseResponseFiles
+from .datatypes import Mixture
+from .datatypes import Mixtures
+from .datatypes import NoiseFiles
+from .datatypes import SpectralMasks
+from .datatypes import TargetFiles
+from .datatypes import Targets
+from .datatypes import UniversalSNRGenerator
+from .mixdb import MixtureDatabase
 def config_file(location: str) -> str:
     from os.path import join
-    return join(location, 'config.yml')
+    return join(location, "config.yml")
 def initialize_db(location: str, test: bool = False) -> None:
@@ -27,9 +28,16 @@ def initialize_db(location: str, test: bool = False) -> None:
     con = db_connection(location=location, create=True, test=test)
     con.execute("""
-    CREATE TABLE truth_setting(
+    CREATE TABLE truth_config(
     id INTEGER PRIMARY KEY NOT NULL,
-    setting TEXT NOT NULL)
+    config TEXT NOT NULL)
+    """)
+    con.execute("""
+    CREATE TABLE truth_parameters(
+    id INTEGER PRIMARY KEY NOT NULL,
+    name TEXT NOT NULL,
+    parameters INTEGER NOT NULL)
     """)
     con.execute("""
@@ -37,6 +45,7 @@ def initialize_db(location: str, test: bool = False) -> None:
     id INTEGER PRIMARY KEY NOT NULL,
     name TEXT NOT NULL,
     samples INTEGER NOT NULL,
+    class_indices TEXT NOT NULL,
     level_type TEXT NOT NULL,
     speaker_id INTEGER,
     FOREIGN KEY(speaker_id) REFERENCES speaker (id))
@@ -65,8 +74,6 @@ def initialize_db(location: str, test: bool = False) -> None:
     noise_mix_mode TEXT NOT NULL,
     num_classes INTEGER NOT NULL,
     seed INTEGER NOT NULL,
-    truth_mutex BOOLEAN NOT NULL,
-    truth_reduction_function TEXT NOT NULL,
     mixid_width INTEGER NOT NULL,
     speaker_metadata_tiers TEXT NOT NULL,
     textgrid_metadata_tiers TEXT NOT NULL)
@@ -87,7 +94,8 @@ def initialize_db(location: str, test: bool = False) -> None:
     con.execute("""
     CREATE TABLE impulse_response_file (
     id INTEGER PRIMARY KEY NOT NULL,
-    file TEXT NOT NULL)
+    file TEXT NOT NULL,
+    tags TEXT NOT NULL)
     """)
     con.execute("""
@@ -101,11 +109,11 @@ def initialize_db(location: str, test: bool = False) -> None:
     """)
     con.execute("""
-    CREATE TABLE target_file_truth_setting (
+    CREATE TABLE target_file_truth_config (
     target_file_id INTEGER,
-    truth_setting_id INTEGER,
+    truth_config_id INTEGER,
     FOREIGN KEY(target_file_id) REFERENCES target_file (id),
-    FOREIGN KEY(truth_setting_id) REFERENCES truth_setting (id))
+    FOREIGN KEY(truth_config_id) REFERENCES truth_config (id))
     """)
     con.execute("""
@@ -148,59 +156,55 @@ def initialize_db(location: str, test: bool = False) -> None:
 def populate_top_table(location: str, config: dict, test: bool = False) -> None:
-    """Populate top table
-    """
+    """Populate top table"""
     import json
-    from sonusai import SonusAIError
+    from .constants import MIXDB_VERSION
     from .mixdb import db_connection
-    if config['truth_mode'] not in ['normal', 'mutex']:
-        raise SonusAIError(f'invalid truth_mode: {config["truth_mode"]}')
-    truth_mutex = config['truth_mode'] == 'mutex'
     con = db_connection(location=location, readonly=False, test=test)
-    con.execute("""
+    con.execute(
+        """
     INSERT INTO top (version, asr_configs, class_balancing, feature, noise_mix_mode, num_classes,
-    seed, truth_mutex, truth_reduction_function, mixid_width, speaker_metadata_tiers, textgrid_metadata_tiers)
-    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-    """, (
-        1,
-        json.dumps(config['asr_configs']),
-        config['class_balancing'],
-        config['feature'],
-        config['noise_mix_mode'],
-        config['num_classes'],
-        config['seed'],
-        truth_mutex,
-        config['truth_reduction_function'],
-        0,
-        '',
-        ''))
+    seed, mixid_width, speaker_metadata_tiers, textgrid_metadata_tiers)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    """,
+        (
+            MIXDB_VERSION,
+            json.dumps(config["asr_configs"]),
+            config["class_balancing"],
+            config["feature"],
+            config["noise_mix_mode"],
+            config["num_classes"],
+            config["seed"],
+            0,
+            "",
+            "",
+        ),
+    )
     con.commit()
     con.close()
 def populate_class_label_table(location: str, config: dict, test: bool = False) -> None:
-    """Populate class_label table
-    """
+    """Populate class_label table"""
     from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("INSERT INTO class_label (label) VALUES (?)",
-                    [(item,) for item in config['class_labels']])
+    con.executemany(
+        "INSERT INTO class_label (label) VALUES (?)",
+        [(item,) for item in config["class_labels"]],
+    )
     con.commit()
     con.close()
 def populate_class_weights_threshold_table(location: str, config: dict, test: bool = False) -> None:
-    """Populate class_weights_threshold table
-    """
-    from sonusai import SonusAIError
+    """Populate class_weights_threshold table"""
     from .mixdb import db_connection
-    class_weights_threshold = config['class_weights_threshold']
-    num_classes = config['num_classes']
+    class_weights_threshold = config["class_weights_threshold"]
+    num_classes = config["num_classes"]
     if not isinstance(class_weights_threshold, list):
         class_weights_threshold = [class_weights_threshold]
@@ -209,43 +213,72 @@ def populate_class_weights_threshold_table(location: str, config: dict, test: bo
         class_weights_threshold = [class_weights_threshold[0]] * num_classes
     if len(class_weights_threshold) != num_classes:
-        raise SonusAIError(f'invalid class_weights_threshold length: {len(class_weights_threshold)}')
+        raise ValueError(f"invalid class_weights_threshold length: {len(class_weights_threshold)}")
     con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("INSERT INTO class_weights_threshold (threshold) VALUES (?)",
-                    [(item,) for item in class_weights_threshold])
+    con.executemany(
+        "INSERT INTO class_weights_threshold (threshold) VALUES (?)",
+        [(item,) for item in class_weights_threshold],
+    )
     con.commit()
     con.close()
 def populate_spectral_mask_table(location: str, config: dict, test: bool = False) -> None:
-    """Populate spectral_mask table
-    """
+    """Populate spectral_mask table"""
     from .config import get_spectral_masks
     from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("""
+    con.executemany(
+        """
     INSERT INTO spectral_mask (f_max_width, f_num, t_max_width, t_num, t_max_percent) VALUES (?, ?, ?, ?, ?)
-    """, [(item.f_max_width,
-           item.f_num,
-           item.t_max_width,
-           item.t_num,
-           item.t_max_percent) for item in get_spectral_masks(config)]
-                    )
+    """,
+        [
+            (
+                item.f_max_width,
+                item.f_num,
+                item.t_max_width,
+                item.t_num,
+                item.t_max_percent,
+            )
+            for item in get_spectral_masks(config)
+        ],
+    )
+    con.commit()
+    con.close()
+def populate_truth_parameters_table(location: str, config: dict, test: bool = False) -> None:
+    """Populate truth_parameters table"""
+    from .config import get_truth_parameters
+    from .mixdb import db_connection
+    con = db_connection(location=location, readonly=False, test=test)
+    con.executemany(
+        """
+    INSERT INTO truth_parameters (name, parameters) VALUES (?, ?)
+    """,
+        [
+            (
+                item.name,
+                item.parameters,
+            )
+            for item in get_truth_parameters(config)
+        ],
+    )
     con.commit()
     con.close()
 def populate_target_file_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
-    """Populate target file table
-    """
+    """Populate target file table"""
     import json
     from pathlib import Path
     from .mixdb import db_connection
-    _populate_truth_setting_table(location, target_files, test)
+    _populate_truth_config_table(location, target_files, test)
     _populate_speaker_table(location, target_files, test)
     con = db_connection(location=location, readonly=False, test=test)
@@ -259,76 +292,106 @@ def populate_target_file_table(location: str, target_files: TargetFiles, test: b
             textgrid_metadata_tiers.add(tier)
         # Get truth settings for target file
-        truth_setting_ids: list[int] = []
-        for truth_setting in target_file.truth_settings:
-            cur.execute("SELECT truth_setting.id FROM truth_setting WHERE ? = truth_setting.setting",
-                        (truth_setting.to_json(),))
-            truth_setting_ids.append(cur.fetchone()[0])
+        truth_config_ids: list[int] = []
+        for name, config in target_file.truth_configs.items():
+            ts = json.dumps({"name": name} | config.to_dict())
+            cur.execute(
+                "SELECT truth_config.id FROM truth_config WHERE ? = truth_config.config",
+                (ts,),
+            )
+            truth_config_ids.append(cur.fetchone()[0])
         # Get speaker_id for target file
-        cur.execute("SELECT speaker.id FROM speaker WHERE ? = speaker.parent",
-                    (Path(target_file.name).parent.as_posix(),))
+        cur.execute(
+            "SELECT speaker.id FROM speaker WHERE ? = speaker.parent",
+            (Path(target_file.name).parent.as_posix(),),
+        )
         result = cur.fetchone()
         speaker_id = None
         if result is not None:
             speaker_id = result[0]
         # Add entry
-        cur.execute("INSERT INTO target_file (name, samples, level_type, speaker_id) VALUES (?, ?, ?, ?)",
-                    (target_file.name, target_file.samples, target_file.level_type, speaker_id))
+        cur.execute(
+            "INSERT INTO target_file (name, samples, class_indices, level_type, speaker_id) VALUES (?, ?, ?, ?, ?)",
+            (
+                target_file.name,
+                target_file.samples,
+                json.dumps(target_file.class_indices),
+                target_file.level_type,
+                speaker_id,
+            ),
+        )
         target_file_id = cur.lastrowid
-        for truth_setting_id in truth_setting_ids:
-            cur.execute("INSERT INTO target_file_truth_setting (target_file_id, truth_setting_id) VALUES (?, ?)",
-                        (target_file_id, truth_setting_id))
+        for truth_config_id in truth_config_ids:
+            cur.execute(
+                "INSERT INTO target_file_truth_config (target_file_id, truth_config_id) VALUES (?, ?)",
+                (target_file_id, truth_config_id),
+            )
     # Update textgrid_metadata_tiers in the top table
-    con.execute("UPDATE top SET textgrid_metadata_tiers=? WHERE top.id = ?",
-                (json.dumps(sorted(textgrid_metadata_tiers)), 1))
+    con.execute(
+        "UPDATE top SET textgrid_metadata_tiers=? WHERE top.id = ?",
+        (json.dumps(sorted(textgrid_metadata_tiers)), 1),
+    )
     con.commit()
     con.close()
 def populate_noise_file_table(location: str, noise_files: NoiseFiles, test: bool = False) -> None:
-    """Populate noise file table
-    """
+    """Populate noise file table"""
     from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("INSERT INTO noise_file (name, samples) VALUES (?, ?)",
-                    [(noise_file.name, noise_file.samples) for noise_file in noise_files])
+    con.executemany(
+        "INSERT INTO noise_file (name, samples) VALUES (?, ?)",
+        [(noise_file.name, noise_file.samples) for noise_file in noise_files],
+    )
     con.commit()
     con.close()
-def populate_impulse_response_file_table(location: str, impulse_response_files: ImpulseResponseFiles,
-                                         test: bool = False) -> None:
-    """Populate impulse response file table
-    """
+def populate_impulse_response_file_table(
+    location: str, impulse_response_files: ImpulseResponseFiles, test: bool = False
+) -> None:
+    """Populate impulse response file table"""
+    import json
     from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("INSERT INTO impulse_response_file (file) VALUES (?)",
-                    [(impulse_response_file,) for impulse_response_file in impulse_response_files])
+    con.executemany(
+        "INSERT INTO impulse_response_file (file, tags) VALUES (?, ?)",
+        [
+            (
+                impulse_response_file.file,
+                json.dumps(impulse_response_file.tags),
+            )
+            for impulse_response_file in impulse_response_files
+        ],
+    )
     con.commit()
     con.close()
 def update_mixid_width(location: str, num_mixtures: int, test: bool = False) -> None:
-    """Update the mixid width
-    """
-    from .mixdb import db_connection
+    """Update the mixid width"""
     from sonusai.utils import max_text_width
+    from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    con.execute("UPDATE top SET mixid_width=? WHERE top.id = ?", (max_text_width(num_mixtures), 1))
+    con.execute(
+        "UPDATE top SET mixid_width=? WHERE top.id = ?",
+        (max_text_width(num_mixtures), 1),
+    )
     con.commit()
     con.close()
 def populate_mixture_table(location: str, mixtures: Mixtures, test: bool = False) -> None:
-    """Populate mixture table
-    """
+    """Populate mixture table"""
     from .helpers import from_mixture
     from .helpers import from_target
     from .mixdb import db_connection
@@ -348,29 +411,35 @@ def populate_mixture_table(location: str, mixtures: Mixtures, test: bool = False
     # Populate mixture table
     cur = con.cursor()
     for mixture in mixtures:
-        cur.execute("""
+        cur.execute(
+            """
         INSERT INTO mixture (name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
         snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """, from_mixture(mixture))
+        """,
+            from_mixture(mixture),
+        )
         mixture_id = cur.lastrowid
         for target in mixture.targets:
-            target_id = con.execute("""
+            target_id = con.execute(
+                """
             SELECT target.id
             FROM target
             WHERE ? = target.file_id AND ? = target.augmentation AND ? = target.gain
-            """, from_target(target)).fetchone()[0]
-            con.execute("INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
-                        (mixture_id, target_id))
+            """,
+                from_target(target),
+            ).fetchone()[0]
+            con.execute(
+                "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
+                (mixture_id, target_id),
+            )
     con.commit()
     con.close()
-def update_mixture(mixdb: MixtureDatabase,
-                   mixture: Mixture,
-                   with_data: bool = False) -> tuple[Mixture, GenMixData]:
+def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = False) -> tuple[Mixture, GenMixData]:
     """Update mixture record with name and gains
     :param mixdb: Mixture database
@@ -391,12 +460,11 @@ def update_mixture(mixdb: MixtureDatabase,
     # Apply IR and sum targets audio before initializing the mixture SNR gains
     target_audio = get_target(mixdb, mixture, targets_audio)
-    mixture = _initialize_mixture_gains(mixdb=mixdb,
-                                        mixture=mixture,
-                                        target_audio=target_audio,
-                                        noise_audio=noise_audio)
+    mixture = _initialize_mixture_gains(
+        mixdb=mixdb, mixture=mixture, target_audio=target_audio, noise_audio=noise_audio
+    )
-    mixture.name = f'{int(mixture.name):0{mixdb.mixid_width}}.h5'
+    mixture.name = f"{int(mixture.name):0{mixdb.mixid_width}}"
     if not with_data:
         return mixture, GenMixData()
@@ -409,10 +477,12 @@ def update_mixture(mixdb: MixtureDatabase,
     target_audio = get_target(mixdb, mixture, targets_audio)
     mixture_audio = target_audio + noise_audio
-    return mixture, GenMixData(mixture=mixture_audio,
-                               targets=targets_audio,
-                               target=target_audio,
-                               noise=noise_audio)
+    return mixture, GenMixData(
+        mixture=mixture_audio,
+        targets=targets_audio,
+        target=target_audio,
+        noise=noise_audio,
+    )
 def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
@@ -439,9 +509,13 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
     targets_audio = []
     for target in mixture.targets:
         target_audio = mixdb.read_target_audio(target.file_id)
-        targets_audio.append(apply_augmentation(audio=target_audio,
-                                                augmentation=target.augmentation,
-                                                frame_length=mixdb.feature_step_samples))
+        targets_audio.append(
+            apply_augmentation(
+                audio=target_audio,
+                augmentation=target.augmentation,
+                frame_length=mixdb.feature_step_samples,
+            )
+        )
         # target_gain is used to back out the gain augmentation in order to return the target audio
         # to its normalized level when calculating truth (if needed).
@@ -458,13 +532,11 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
     return mixture, targets_audio
-def _initialize_mixture_gains(mixdb: MixtureDatabase,
-                              mixture: Mixture,
-                              target_audio: AudioT,
-                              noise_audio: AudioT) -> Mixture:
+def _initialize_mixture_gains(
+    mixdb: MixtureDatabase, mixture: Mixture, target_audio: AudioT, noise_audio: AudioT
+) -> Mixture:
     import numpy as np
-    from sonusai import SonusAIError
     from sonusai.utils import asl_p56
     from sonusai.utils import db_to_linear
@@ -480,19 +552,20 @@ def _initialize_mixture_gains(mixdb: MixtureDatabase,
         mixture.target_snr_gain = 1
         mixture.noise_snr_gain = 0
     else:
-        target_level_types = [target_file.level_type for target_file in
-                              [mixdb.target_file(target.file_id) for target in mixture.targets]]
+        target_level_types = [
+            target_file.level_type for target_file in [mixdb.target_file(target.file_id) for target in mixture.targets]
+        ]
         if not all(level_type == target_level_types[0] for level_type in target_level_types):
-            raise SonusAIError(f'Not all target_level_types in mixup are the same')
+            raise ValueError("Not all target_level_types in mixup are the same")
         level_type = target_level_types[0]
         match level_type:
-            case 'default':
+            case "default":
                 target_energy = np.mean(np.square(target_audio))
-            case 'speech':
+            case "speech":
                 target_energy = asl_p56(target_audio)
             case _:
-                raise SonusAIError(f'Unknown level_type: {level_type}')
+                raise ValueError(f"Unknown level_type: {level_type}")
         noise_energy = np.mean(np.square(noise_audio))
         if noise_energy == 0:
@@ -525,19 +598,20 @@ def _initialize_mixture_gains(mixdb: MixtureDatabase,
     return mixture
-def generate_mixtures(noise_mix_mode: str,
-                      augmented_targets: AugmentedTargets,
-                      target_files: TargetFiles,
-                      target_augmentations: AugmentationRules,
-                      noise_files: NoiseFiles,
-                      noise_augmentations: AugmentationRules,
-                      spectral_masks: SpectralMasks,
-                      all_snrs: list[UniversalSNRGenerator],
-                      mixups: list[int],
-                      num_classes: int,
-                      truth_mutex: bool,
-                      feature_step_samples: int,
-                      num_ir: int) -> tuple[int, int, Mixtures]:
+def generate_mixtures(
+    noise_mix_mode: str,
+    augmented_targets: AugmentedTargets,
+    target_files: TargetFiles,
+    target_augmentations: AugmentationRules,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    spectral_masks: SpectralMasks,
+    all_snrs: list[UniversalSNRGenerator],
+    mixups: list[int],
+    num_classes: int,
+    feature_step_samples: int,
+    num_ir: int,
+) -> tuple[int, int, Mixtures]:
     """Generate mixtures
     :param noise_mix_mode: Noise mix mode
@@ -550,72 +624,72 @@ def generate_mixtures(noise_mix_mode: str,
     :param all_snrs: List of all SNRs
     :param mixups: List of mixup values
     :param num_classes: Number of classes
-    :param truth_mutex: Truth mutex mode
     :param feature_step_samples: Number of samples in a feature step
     :param num_ir: Number of impulse response files
     :return: (Number of noise files used, number of noise samples used, list of mixture records)
     """
-    from sonusai import SonusAIError
-    if noise_mix_mode == 'exhaustive':
-        return _exhaustive_noise_mix(augmented_targets=augmented_targets,
-                                     target_files=target_files,
-                                     target_augmentations=target_augmentations,
-                                     noise_files=noise_files,
-                                     noise_augmentations=noise_augmentations,
-                                     spectral_masks=spectral_masks,
-                                     all_snrs=all_snrs,
-                                     mixups=mixups,
-                                     num_classes=num_classes,
-                                     truth_mutex=truth_mutex,
-                                     feature_step_samples=feature_step_samples,
-                                     num_ir=num_ir)
-    if noise_mix_mode == 'non-exhaustive':
-        return _non_exhaustive_noise_mix(augmented_targets=augmented_targets,
-                                         target_files=target_files,
-                                         target_augmentations=target_augmentations,
-                                         noise_files=noise_files,
-                                         noise_augmentations=noise_augmentations,
-                                         spectral_masks=spectral_masks,
-                                         all_snrs=all_snrs,
-                                         mixups=mixups,
-                                         num_classes=num_classes,
-                                         truth_mutex=truth_mutex,
-                                         feature_step_samples=feature_step_samples,
-                                         num_ir=num_ir)
-    if noise_mix_mode == 'non-combinatorial':
-        return _non_combinatorial_noise_mix(augmented_targets=augmented_targets,
-                                            target_files=target_files,
-                                            target_augmentations=target_augmentations,
-                                            noise_files=noise_files,
-                                            noise_augmentations=noise_augmentations,
-                                            spectral_masks=spectral_masks,
-                                            all_snrs=all_snrs,
-                                            mixups=mixups,
-                                            num_classes=num_classes,
-                                            truth_mutex=truth_mutex,
-                                            feature_step_samples=feature_step_samples,
-                                            num_ir=num_ir)
-    raise SonusAIError(f'invalid noise_mix_mode: {noise_mix_mode}')
-def _exhaustive_noise_mix(augmented_targets: AugmentedTargets,
-                          target_files: TargetFiles,
-                          target_augmentations: AugmentationRules,
-                          noise_files: NoiseFiles,
-                          noise_augmentations: AugmentationRules,
-                          spectral_masks: SpectralMasks,
-                          all_snrs: list[UniversalSNRGenerator],
-                          mixups: list[int],
-                          num_classes: int,
-                          truth_mutex: bool,
-                          feature_step_samples: int,
-                          num_ir: int) -> tuple[int, int, Mixtures]:
-    """ Use every noise/augmentation with every target/augmentation
-    """
+    if noise_mix_mode == "exhaustive":
+        return _exhaustive_noise_mix(
+            augmented_targets=augmented_targets,
+            target_files=target_files,
+            target_augmentations=target_augmentations,
+            noise_files=noise_files,
+            noise_augmentations=noise_augmentations,
+            spectral_masks=spectral_masks,
+            all_snrs=all_snrs,
+            mixups=mixups,
+            num_classes=num_classes,
+            feature_step_samples=feature_step_samples,
+            num_ir=num_ir,
+        )
+    if noise_mix_mode == "non-exhaustive":
+        return _non_exhaustive_noise_mix(
+            augmented_targets=augmented_targets,
+            target_files=target_files,
+            target_augmentations=target_augmentations,
+            noise_files=noise_files,
+            noise_augmentations=noise_augmentations,
+            spectral_masks=spectral_masks,
+            all_snrs=all_snrs,
+            mixups=mixups,
+            num_classes=num_classes,
+            feature_step_samples=feature_step_samples,
+            num_ir=num_ir,
+        )
+    if noise_mix_mode == "non-combinatorial":
+        return _non_combinatorial_noise_mix(
+            augmented_targets=augmented_targets,
+            target_files=target_files,
+            target_augmentations=target_augmentations,
+            noise_files=noise_files,
+            noise_augmentations=noise_augmentations,
+            spectral_masks=spectral_masks,
+            all_snrs=all_snrs,
+            mixups=mixups,
+            num_classes=num_classes,
+            feature_step_samples=feature_step_samples,
+            num_ir=num_ir,
+        )
+    raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
+def _exhaustive_noise_mix(
+    augmented_targets: AugmentedTargets,
+    target_files: TargetFiles,
+    target_augmentations: AugmentationRules,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    spectral_masks: SpectralMasks,
+    all_snrs: list[UniversalSNRGenerator],
+    mixups: list[int],
+    num_classes: int,
+    feature_step_samples: int,
+    num_ir: int,
+) -> tuple[int, int, Mixtures]:
+    """Use every noise/augmentation with every target/augmentation"""
     from random import randint
     import numpy as np
@@ -633,42 +707,53 @@ def _exhaustive_noise_mix(augmented_targets: AugmentedTargets,
     used_noise_files = len(noise_files) * len(noise_augmentations)
     used_noise_samples = 0
-    augmented_target_ids_for_mixups = [get_augmented_target_ids_for_mixup(augmented_targets=augmented_targets,
-                                                                          targets=target_files,
-                                                                          target_augmentations=target_augmentations,
-                                                                          mixup=mixup,
-                                                                          num_classes=num_classes,
-                                                                          truth_mutex=truth_mutex) for mixup in mixups]
+    augmented_target_ids_for_mixups = [
+        get_augmented_target_ids_for_mixup(
+            augmented_targets=augmented_targets,
+            targets=target_files,
+            target_augmentations=target_augmentations,
+            mixup=mixup,
+            num_classes=num_classes,
+        )
+        for mixup in mixups
+    ]
     for noise_file_id in range(len(noise_files)):
         for noise_augmentation_rule in noise_augmentations:
             noise_augmentation = augmentation_from_rule(noise_augmentation_rule, num_ir)
             noise_offset = 0
             noise_length = estimate_augmented_length_from_length(
                 length=noise_files[noise_file_id].samples,
-                tempo=noise_augmentation.tempo)
+                tempo=noise_augmentation.tempo,
+            )
             for augmented_target_ids_for_mixup in augmented_target_ids_for_mixups:
                 for augmented_target_ids in augmented_target_ids_for_mixup:
-                    targets, target_length = _get_target_info(augmented_target_ids=augmented_target_ids,
-                                                              augmented_targets=augmented_targets,
-                                                              target_files=target_files,
-                                                              target_augmentations=target_augmentations,
-                                                              feature_step_samples=feature_step_samples,
-                                                              num_ir=num_ir)
+                    targets, target_length = _get_target_info(
+                        augmented_target_ids=augmented_target_ids,
+                        augmented_targets=augmented_targets,
+                        target_files=target_files,
+                        target_augmentations=target_augmentations,
+                        feature_step_samples=feature_step_samples,
+                        num_ir=num_ir,
+                    )
                     for spectral_mask_id in range(len(spectral_masks)):
                         for snr in all_snrs:
-                            mixtures.append(Mixture(
-                                targets=targets,
-                                name=str(m_id),
-                                noise=Noise(file_id=noise_file_id + 1,
-                                            augmentation=noise_augmentation,
-                                            offset=noise_offset),
-                                samples=target_length,
-                                snr=UniversalSNR(value=snr.value,
-                                                 is_random=snr.is_random),
-                                spectral_mask_id=spectral_mask_id + 1,
-                                spectral_mask_seed=randint(0, np.iinfo('i').max)))
+                            mixtures.append(
+                                Mixture(
+                                    targets=targets,
+                                    name=str(m_id),
+                                    noise=Noise(
+                                        file_id=noise_file_id + 1,
+                                        augmentation=noise_augmentation,
+                                        offset=noise_offset,
+                                    ),
+                                    samples=target_length,
+                                    snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
+                                    spectral_mask_id=spectral_mask_id + 1,
+                                    spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
+                                )
+                            )
                             m_id += 1
                             noise_offset = int((noise_offset + target_length) % noise_length)
@@ -677,19 +762,20 @@ def _exhaustive_noise_mix(augmented_targets: AugmentedTargets,
     return used_noise_files, used_noise_samples, mixtures
-def _non_exhaustive_noise_mix(augmented_targets: AugmentedTargets,
-                              target_files: TargetFiles,
-                              target_augmentations: AugmentationRules,
-                              noise_files: NoiseFiles,
-                              noise_augmentations: AugmentationRules,
-                              spectral_masks: SpectralMasks,
-                              all_snrs: list[UniversalSNRGenerator],
-                              mixups: list[int],
-                              num_classes: int,
-                              truth_mutex: bool,
-                              feature_step_samples: int,
-                              num_ir: int) -> tuple[int, int, Mixtures]:
-    """ Cycle through every target/augmentation without necessarily using all noise/augmentation combinations
+def _non_exhaustive_noise_mix(
+    augmented_targets: AugmentedTargets,
+    target_files: TargetFiles,
+    target_augmentations: AugmentationRules,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    spectral_masks: SpectralMasks,
+    all_snrs: list[UniversalSNRGenerator],
+    mixups: list[int],
+    num_classes: int,
+    feature_step_samples: int,
+    num_ir: int,
+) -> tuple[int, int, Mixtures]:
+    """Cycle through every target/augmentation without necessarily using all noise/augmentation combinations
     (reduced data set).
     """
     from random import randint
@@ -710,67 +796,81 @@ def _non_exhaustive_noise_mix(augmented_targets: AugmentedTargets,
     noise_augmentation_id = None
     noise_offset = None
-    augmented_target_indices_for_mixups = [get_augmented_target_ids_for_mixup(
-        augmented_targets=augmented_targets,
-        targets=target_files,
-        target_augmentations=target_augmentations,
-        mixup=mixup,
-        num_classes=num_classes,
-        truth_mutex=truth_mutex) for mixup in mixups]
+    augmented_target_indices_for_mixups = [
+        get_augmented_target_ids_for_mixup(
+            augmented_targets=augmented_targets,
+            targets=target_files,
+            target_augmentations=target_augmentations,
+            mixup=mixup,
+            num_classes=num_classes,
+        )
+        for mixup in mixups
+    ]
     for mixup in augmented_target_indices_for_mixups:
         for augmented_target_indices in mixup:
-            targets, target_length = _get_target_info(augmented_target_ids=augmented_target_indices,
-                                                      augmented_targets=augmented_targets,
-                                                      target_files=target_files,
-                                                      target_augmentations=target_augmentations,
-                                                      feature_step_samples=feature_step_samples,
-                                                      num_ir=num_ir)
+            targets, target_length = _get_target_info(
+                augmented_target_ids=augmented_target_indices,
+                augmented_targets=augmented_targets,
+                target_files=target_files,
+                target_augmentations=target_augmentations,
+                feature_step_samples=feature_step_samples,
+                num_ir=num_ir,
+            )
             for spectral_mask_id in range(len(spectral_masks)):
                 for snr in all_snrs:
-                    (noise_file_id,
-                     noise_augmentation_id,
-                     noise_augmentation,
-                     noise_offset) = _get_next_noise_offset(noise_file_id=noise_file_id,
-                                                            noise_augmentation_id=noise_augmentation_id,
-                                                            noise_offset=noise_offset,
-                                                            target_length=target_length,
-                                                            noise_files=noise_files,
-                                                            noise_augmentations=noise_augmentations,
-                                                            num_ir=num_ir)
+                    (
+                        noise_file_id,
+                        noise_augmentation_id,
+                        noise_augmentation,
+                        noise_offset,
+                    ) = _get_next_noise_offset(
+                        noise_file_id=noise_file_id,
+                        noise_augmentation_id=noise_augmentation_id,
+                        noise_offset=noise_offset,
+                        target_length=target_length,
+                        noise_files=noise_files,
+                        noise_augmentations=noise_augmentations,
+                        num_ir=num_ir,
+                    )
                     used_noise_samples += target_length
-                    used_noise_files.add(f'{noise_file_id}_{noise_augmentation_id}')
-                    mixtures.append(Mixture(
-                        targets=targets,
-                        name=str(m_id),
-                        noise=Noise(file_id=noise_file_id + 1,
-                                    augmentation=noise_augmentation,
-                                    offset=noise_offset),
-                        samples=target_length,
-                        snr=UniversalSNR(value=snr.value,
-                                         is_random=snr.is_random),
-                        spectral_mask_id=spectral_mask_id + 1,
-                        spectral_mask_seed=randint(0, np.iinfo('i').max)))
+                    used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
+                    mixtures.append(
+                        Mixture(
+                            targets=targets,
+                            name=str(m_id),
+                            noise=Noise(
+                                file_id=noise_file_id + 1,
+                                augmentation=noise_augmentation,
+                                offset=noise_offset,
+                            ),
+                            samples=target_length,
+                            snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
+                            spectral_mask_id=spectral_mask_id + 1,
+                            spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
+                        )
+                    )
                     m_id += 1
     return len(used_noise_files), used_noise_samples, mixtures
-def _non_combinatorial_noise_mix(augmented_targets: AugmentedTargets,
-                                 target_files: TargetFiles,
-                                 target_augmentations: AugmentationRules,
-                                 noise_files: NoiseFiles,
-                                 noise_augmentations: AugmentationRules,
-                                 spectral_masks: SpectralMasks,
-                                 all_snrs: list[UniversalSNRGenerator],
-                                 mixups: list[int],
-                                 num_classes: int,
-                                 truth_mutex: bool,
-                                 feature_step_samples: int,
-                                 num_ir: int) -> tuple[int, int, Mixtures]:
-    """ Combine a target/augmentation with a single cut of a noise/augmentation non-exhaustively
+def _non_combinatorial_noise_mix(
+    augmented_targets: AugmentedTargets,
+    target_files: TargetFiles,
+    target_augmentations: AugmentationRules,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    spectral_masks: SpectralMasks,
+    all_snrs: list[UniversalSNRGenerator],
+    mixups: list[int],
+    num_classes: int,
+    feature_step_samples: int,
+    num_ir: int,
+) -> tuple[int, int, Mixtures]:
+    """Combine a target/augmentation with a single cut of a noise/augmentation non-exhaustively
     (each target/augmentation does not use each noise/augmentation). Cut has random start and loop back to
     beginning if end of noise/augmentation is reached.
     """
@@ -792,57 +892,72 @@ def _non_combinatorial_noise_mix(augmented_targets: AugmentedTargets,
     noise_file_id = None
     noise_augmentation_id = None
-    augmented_target_indices_for_mixups = [get_augmented_target_ids_for_mixup(
-        augmented_targets=augmented_targets,
-        targets=target_files,
-        target_augmentations=target_augmentations,
-        mixup=mixup,
-        num_classes=num_classes,
-        truth_mutex=truth_mutex) for mixup in mixups]
+    augmented_target_indices_for_mixups = [
+        get_augmented_target_ids_for_mixup(
+            augmented_targets=augmented_targets,
+            targets=target_files,
+            target_augmentations=target_augmentations,
+            mixup=mixup,
+            num_classes=num_classes,
+        )
+        for mixup in mixups
+    ]
     for mixup in augmented_target_indices_for_mixups:
         for augmented_target_indices in mixup:
-            targets, target_length = _get_target_info(augmented_target_ids=augmented_target_indices,
-                                                      augmented_targets=augmented_targets,
-                                                      target_files=target_files,
-                                                      target_augmentations=target_augmentations,
-                                                      feature_step_samples=feature_step_samples,
-                                                      num_ir=num_ir)
+            targets, target_length = _get_target_info(
+                augmented_target_ids=augmented_target_indices,
+                augmented_targets=augmented_targets,
+                target_files=target_files,
+                target_augmentations=target_augmentations,
+                feature_step_samples=feature_step_samples,
+                num_ir=num_ir,
+            )
             for spectral_mask_id in range(len(spectral_masks)):
                 for snr in all_snrs:
-                    (noise_file_id,
-                     noise_augmentation_id,
-                     noise_augmentation,
-                     noise_length) = _get_next_noise_indices(noise_file_id=noise_file_id,
-                                                             noise_augmentation_id=noise_augmentation_id,
-                                                             noise_files=noise_files,
-                                                             noise_augmentations=noise_augmentations,
-                                                             num_ir=num_ir)
+                    (
+                        noise_file_id,
+                        noise_augmentation_id,
+                        noise_augmentation,
+                        noise_length,
+                    ) = _get_next_noise_indices(
+                        noise_file_id=noise_file_id,
+                        noise_augmentation_id=noise_augmentation_id,
+                        noise_files=noise_files,
+                        noise_augmentations=noise_augmentations,
+                        num_ir=num_ir,
+                    )
                     used_noise_samples += target_length
-                    used_noise_files.add(f'{noise_file_id}_{noise_augmentation_id}')
-                    mixtures.append(Mixture(
-                        targets=targets,
-                        name=str(m_id),
-                        noise=Noise(file_id=noise_file_id + 1,
-                                    augmentation=noise_augmentation,
-                                    offset=choice(range(noise_length))),
-                        samples=target_length,
-                        snr=UniversalSNR(value=snr.value,
-                                         is_random=snr.is_random),
-                        spectral_mask_id=spectral_mask_id + 1,
-                        spectral_mask_seed=randint(0, np.iinfo('i').max)))
+                    used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
+                    mixtures.append(
+                        Mixture(
+                            targets=targets,
+                            name=str(m_id),
+                            noise=Noise(
+                                file_id=noise_file_id + 1,
+                                augmentation=noise_augmentation,
+                                offset=choice(range(noise_length)),  # noqa: S311
+                            ),
+                            samples=target_length,
+                            snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
+                            spectral_mask_id=spectral_mask_id + 1,
+                            spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
+                        )
+                    )
                     m_id += 1
     return len(used_noise_files), used_noise_samples, mixtures
-def _get_next_noise_indices(noise_file_id: int,
-                            noise_augmentation_id: int,
-                            noise_files: NoiseFiles,
-                            noise_augmentations: AugmentationRules,
-                            num_ir: int) -> tuple[int, int, Augmentation, int]:
+def _get_next_noise_indices(
+    noise_file_id: int | None,
+    noise_augmentation_id: int | None,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    num_ir: int,
+) -> tuple[int, int, Augmentation, int]:
     from .augmentation import augmentation_from_rule
     from .augmentation import estimate_augmented_length_from_length
@@ -858,19 +973,21 @@ def _get_next_noise_indices(noise_file_id: int,
                 noise_file_id = 0
     noise_augmentation = augmentation_from_rule(noise_augmentations[noise_augmentation_id], num_ir)
-    noise_length = estimate_augmented_length_from_length(length=noise_files[noise_file_id].samples,
-                                                         tempo=noise_augmentation.tempo)
+    noise_length = estimate_augmented_length_from_length(
+        length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
+    )
     return noise_file_id, noise_augmentation_id, noise_augmentation, noise_length
-def _get_next_noise_offset(noise_file_id: int | None,
-                           noise_augmentation_id: int | None,
-                           noise_offset: int | None,
-                           target_length: int,
-                           noise_files: NoiseFiles,
-                           noise_augmentations: AugmentationRules,
-                           num_ir: int) -> tuple[int, int, Augmentation, int]:
-    from sonusai import SonusAIError
+def _get_next_noise_offset(
+    noise_file_id: int | None,
+    noise_augmentation_id: int | None,
+    noise_offset: int | None,
+    target_length: int,
+    noise_files: NoiseFiles,
+    noise_augmentations: AugmentationRules,
+    num_ir: int,
+) -> tuple[int, int, Augmentation, int]:
     from .augmentation import augmentation_from_rule
     from .augmentation import estimate_augmented_length_from_length
@@ -880,11 +997,12 @@ def _get_next_noise_offset(noise_file_id: int | None,
         noise_offset = 0
     noise_augmentation = augmentation_from_rule(noise_augmentations[noise_file_id], num_ir)
-    noise_length = estimate_augmented_length_from_length(length=noise_files[noise_file_id].samples,
-                                                         tempo=noise_augmentation.tempo)
+    noise_length = estimate_augmented_length_from_length(
+        length=noise_files[noise_file_id].samples, tempo=noise_augmentation.tempo
+    )
     if noise_offset + target_length >= noise_length:
         if noise_offset == 0:
-            raise SonusAIError('Length of target audio exceeds length of noise audio')
+            raise ValueError("Length of target audio exceeds length of noise audio")
         noise_offset = 0
         noise_augmentation_id += 1
@@ -898,12 +1016,14 @@ def _get_next_noise_offset(noise_file_id: int | None,
     return noise_file_id, noise_augmentation_id, noise_augmentation, noise_offset
-def _get_target_info(augmented_target_ids: list[int],
-                     augmented_targets: AugmentedTargets,
-                     target_files: TargetFiles,
-                     target_augmentations: AugmentationRules,
-                     feature_step_samples: int,
-                     num_ir: int) -> tuple[Targets, int]:
+def _get_target_info(
+    augmented_target_ids: list[int],
+    augmented_targets: AugmentedTargets,
+    target_files: TargetFiles,
+    target_augmentations: AugmentationRules,
+    feature_step_samples: int,
+    num_ir: int,
+) -> tuple[Targets, int]:
     from .augmentation import augmentation_from_rule
     from .augmentation import estimate_augmented_length_from_length
     from .datatypes import Target
@@ -918,18 +1038,23 @@ def _get_target_info(augmented_target_ids: list[int],
         mixups.append(Target(file_id=tfi + 1, augmentation=target_augmentation))
-        target_length = max(estimate_augmented_length_from_length(length=target_files[tfi].samples,
-                                                                  tempo=target_augmentation.tempo,
-                                                                  frame_length=feature_step_samples),
-                            target_length)
+        target_length = max(
+            estimate_augmented_length_from_length(
+                length=target_files[tfi].samples,
+                tempo=target_augmentation.tempo,
+                frame_length=feature_step_samples,
+            ),
+            target_length,
+        )
     return mixups, target_length
 def get_all_snrs_from_config(config: dict) -> list[UniversalSNRGenerator]:
     from .datatypes import UniversalSNRGenerator
-    return ([UniversalSNRGenerator(is_random=False, _raw_value=snr) for snr in config['snrs']] +
-            [UniversalSNRGenerator(is_random=True, _raw_value=snr) for snr in config['random_snrs']])
+    return [UniversalSNRGenerator(is_random=False, _raw_value=snr) for snr in config["snrs"]] + [
+        UniversalSNRGenerator(is_random=True, _raw_value=snr) for snr in config["random_snrs"]
+    ]
 def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
@@ -939,7 +1064,7 @@ def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
     from sonusai.mixture import tokenized_expand
-    textgrid_file = Path(tokenized_expand(target_file)[0]).with_suffix('.TextGrid')
+    textgrid_file = Path(tokenized_expand(target_file)[0]).with_suffix(".TextGrid")
     if not textgrid_file.exists():
         return []
@@ -949,8 +1074,7 @@ def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
 def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
-    """Populate speaker table
-    """
+    """Populate speaker table"""
     import json
     from pathlib import Path
@@ -960,65 +1084,74 @@ def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool
     from .tokenized_shell_vars import tokenized_expand
     # Determine columns for speaker table
-    all_parents = set([Path(target_file.name).parent for target_file in target_files])
-    speaker_parents = (parent for parent in all_parents if Path(tokenized_expand(parent / 'speaker.yml')[0]).exists())
+    all_parents = {Path(target_file.name).parent for target_file in target_files}
+    speaker_parents = (parent for parent in all_parents if Path(tokenized_expand(parent / "speaker.yml")[0]).exists())
     speakers: dict[Path, dict[str, str]] = {}
     for parent in sorted(speaker_parents):
-        with open(tokenized_expand(parent / 'speaker.yml')[0], 'r') as f:
+        with open(tokenized_expand(parent / "speaker.yml")[0]) as f:
             speakers[parent] = yaml.safe_load(f)
     new_columns: list[str] = []
-    for keys in speakers.keys():
-        for column in speakers[keys].keys():
+    for keys in speakers:
+        for column in speakers[keys]:
             new_columns.append(column)
     new_columns = sorted(set(new_columns))
     con = db_connection(location=location, readonly=False, test=test)
     for new_column in new_columns:
-        con.execute(f'ALTER TABLE speaker ADD COLUMN {new_column} TEXT')
+        con.execute(f"ALTER TABLE speaker ADD COLUMN {new_column} TEXT")
     # Populate speaker table
     speaker_rows: list[tuple[str, ...]] = []
-    for key in speakers.keys():
+    for key in speakers:
         entry = (speakers[key].get(column, None) for column in new_columns)
-        speaker_rows.append((key.as_posix(), *entry))
+        speaker_rows.append((key.as_posix(), *entry))  # type: ignore[arg-type]
-    column_ids = ', '.join(['parent', *new_columns])
-    column_values = ', '.join(['?'] * (len(new_columns) + 1))
-    con.executemany(f'INSERT INTO speaker ({column_ids}) VALUES ({column_values})', speaker_rows)
+    column_ids = ", ".join(["parent", *new_columns])
+    column_values = ", ".join(["?"] * (len(new_columns) + 1))
+    con.executemany(f"INSERT INTO speaker ({column_ids}) VALUES ({column_values})", speaker_rows)
     con.execute("CREATE INDEX speaker_parent_idx ON speaker (parent)")
     # Update speaker_metadata_tiers in the top table
-    tiers = [description[0] for description in con.execute("SELECT * FROM speaker").description if
-             description[0] not in ('id', 'parent')]
-    con.execute("UPDATE top SET speaker_metadata_tiers=? WHERE top.id = ?", (json.dumps(tiers), 1))
-    if 'speaker_id' in tiers:
+    tiers = [
+        description[0]
+        for description in con.execute("SELECT * FROM speaker").description
+        if description[0] not in ("id", "parent")
+    ]
+    con.execute(
+        "UPDATE top SET speaker_metadata_tiers=? WHERE top.id = ?",
+        (json.dumps(tiers), 1),
+    )
+    if "speaker_id" in tiers:
         con.execute("CREATE INDEX speaker_speaker_id_idx ON speaker (speaker_id)")
     con.commit()
     con.close()
-def _populate_truth_setting_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
-    """Populate truth_setting table
-    """
+def _populate_truth_config_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
+    """Populate truth_config table"""
+    import json
     from .mixdb import db_connection
     con = db_connection(location=location, readonly=False, test=test)
-    # Populate truth_setting table
-    truth_settings: list[str] = []
-    for truth_setting in [truth_setting for target_file in target_files
-                          for truth_setting in target_file.truth_settings]:
-        ts = truth_setting.to_json()
-        if ts not in truth_settings:
-            truth_settings.append(ts)
-    con.executemany("INSERT INTO truth_setting (setting) VALUES (?)",
-                    [(item,) for item in truth_settings])
+    # Populate truth_config table
+    truth_configs: list[str] = []
+    for target_file in target_files:
+        for name, config in target_file.truth_configs.items():
+            ts = json.dumps({"name": name} | config.to_dict())
+            if ts not in truth_configs:
+                truth_configs.append(ts)
+    con.executemany(
+        "INSERT INTO truth_config (config) VALUES (?)",
+        [(item,) for item in truth_configs],
+    )
     con.commit()
     con.close()

sonusai 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl