PyPI - sonusai - Versions diffs - 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

sonusai/__init__.py +16 -3
sonusai/audiofe.py +241 -77
sonusai/calc_metric_spenh.py +71 -73
sonusai/config/__init__.py +3 -0
sonusai/config/config.py +61 -0
sonusai/config/config.yml +20 -0
sonusai/config/constants.py +8 -0
sonusai/constants.py +11 -0
sonusai/data/genmixdb.yml +21 -36
sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
sonusai/deprecated/plot.py +4 -5
sonusai/doc/doc.py +4 -4
sonusai/doc.py +11 -4
sonusai/genft.py +43 -45
sonusai/genmetrics.py +25 -19
sonusai/genmix.py +54 -82
sonusai/genmixdb.py +88 -264
sonusai/ir_metric.py +30 -34
sonusai/lsdb.py +41 -48
sonusai/main.py +15 -22
sonusai/metrics/calc_audio_stats.py +4 -293
sonusai/metrics/calc_class_weights.py +4 -4
sonusai/metrics/calc_optimal_thresholds.py +8 -5
sonusai/metrics/calc_pesq.py +2 -2
sonusai/metrics/calc_segsnr_f.py +4 -4
sonusai/metrics/calc_speech.py +25 -13
sonusai/metrics/class_summary.py +7 -7
sonusai/metrics/confusion_matrix_summary.py +5 -5
sonusai/metrics/one_hot.py +4 -4
sonusai/metrics/snr_summary.py +7 -7
sonusai/metrics_summary.py +38 -45
sonusai/mixture/__init__.py +4 -104
sonusai/mixture/audio.py +10 -39
sonusai/mixture/class_balancing.py +103 -0
sonusai/mixture/config.py +251 -271
sonusai/mixture/constants.py +35 -39
sonusai/mixture/data_io.py +25 -36
sonusai/mixture/db_datatypes.py +58 -22
sonusai/mixture/effects.py +386 -0
sonusai/mixture/feature.py +7 -11
sonusai/mixture/generation.py +478 -628
sonusai/mixture/helpers.py +82 -184
sonusai/mixture/ir_delay.py +3 -4
sonusai/mixture/ir_effects.py +77 -0
sonusai/mixture/log_duration_and_sizes.py +6 -12
sonusai/mixture/mixdb.py +910 -729
sonusai/mixture/pad_audio.py +35 -0
sonusai/mixture/resample.py +7 -0
sonusai/mixture/sox_effects.py +195 -0
sonusai/mixture/sox_help.py +650 -0
sonusai/mixture/spectral_mask.py +2 -2
sonusai/mixture/truth.py +17 -15
sonusai/mixture/truth_functions/crm.py +12 -12
sonusai/mixture/truth_functions/energy.py +22 -22
sonusai/mixture/truth_functions/file.py +5 -5
sonusai/mixture/truth_functions/metadata.py +4 -4
sonusai/mixture/truth_functions/metrics.py +4 -4
sonusai/mixture/truth_functions/phoneme.py +3 -3
sonusai/mixture/truth_functions/sed.py +11 -13
sonusai/mixture/truth_functions/target.py +10 -10
sonusai/mkwav.py +26 -29
sonusai/onnx_predict.py +240 -88
sonusai/queries/__init__.py +2 -2
sonusai/queries/queries.py +38 -34
sonusai/speech/librispeech.py +1 -1
sonusai/speech/mcgill.py +1 -1
sonusai/speech/timit.py +2 -2
sonusai/summarize_metric_spenh.py +10 -17
sonusai/utils/__init__.py +7 -1
sonusai/utils/asl_p56.py +2 -2
sonusai/utils/asr.py +2 -2
sonusai/utils/asr_functions/aaware_whisper.py +4 -5
sonusai/utils/choice.py +31 -0
sonusai/utils/compress.py +1 -1
sonusai/utils/dataclass_from_dict.py +19 -1
sonusai/utils/energy_f.py +3 -3
sonusai/utils/evaluate_random_rule.py +15 -0
sonusai/utils/keyboard_interrupt.py +12 -0
sonusai/utils/onnx_utils.py +3 -17
sonusai/utils/print_mixture_details.py +21 -19
sonusai/utils/{temp_seed.py → rand.py} +3 -3
sonusai/utils/read_predict_data.py +2 -2
sonusai/utils/reshape.py +3 -3
sonusai/utils/stratified_shuffle_split.py +3 -3
sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
sonusai/utils/write_audio.py +2 -2
sonusai/vars.py +11 -4
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
sonusai-1.0.2.dist-info/RECORD +138 -0
sonusai/mixture/augmentation.py +0 -444
sonusai/mixture/class_count.py +0 -15
sonusai/mixture/eq_rule_is_valid.py +0 -45
sonusai/mixture/target_class_balancing.py +0 -107
sonusai/mixture/targets.py +0 -175
sonusai-0.20.3.dist-info/RECORD +0 -128
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0

sonusai/mixture/config.py CHANGED Viewed

@@ -1,8 +1,7 @@
-from sonusai.mixture.datatypes import ImpulseResponseFile
-from sonusai.mixture.datatypes import NoiseFile
-from sonusai.mixture.datatypes import SpectralMask
-from sonusai.mixture.datatypes import TargetFile
-from sonusai.mixture.datatypes import TruthParameter
+from ..datatypes import ImpulseResponseFile
+from ..datatypes import SourceFile
+from ..datatypes import SpectralMask
+from ..datatypes import TruthParameter
 def raw_load_config(name: str) -> dict:
@@ -54,7 +53,6 @@ def update_config_from_file(filename: str, given_config: dict) -> dict:
     from .constants import REQUIRED_CONFIGS
     from .constants import VALID_CONFIGS
-    from .constants import VALID_NOISE_MIX_MODES
     updated_config = deepcopy(given_config)
@@ -81,6 +79,9 @@ def update_config_from_file(filename: str, given_config: dict) -> dict:
         if key not in updated_config:
             raise AttributeError(f"{filename} is missing required '{key}'")
+    # Validate and update sources
+    updated_config = update_sources(updated_config)
     # Validate special cases
     validate_truth_configs(updated_config)
     validate_asr_configs(updated_config)
@@ -89,14 +90,76 @@ def update_config_from_file(filename: str, given_config: dict) -> dict:
     if len(updated_config["spectral_masks"]) == 0:
         updated_config["spectral_masks"] = given_config["spectral_masks"]
-    # Check for valid noise_mix_mode
-    if updated_config["noise_mix_mode"] not in VALID_NOISE_MIX_MODES:
-        nice_list = "\n".join([f"  {item}" for item in VALID_NOISE_MIX_MODES])
-        raise ValueError(f"{filename} contains invalid noise_mix_mode.\nValid noise mix modes are:\n{nice_list}")
     return updated_config
+def update_sources(given: dict) -> dict:
+    """Validate and update fields in given 'sources'
+    :param given: The dictionary of given config
+    """
+    from .constants import REQUIRED_NON_PRIMARY_SOURCE_CONFIGS
+    from .constants import REQUIRED_SOURCE_CONFIGS
+    from .constants import REQUIRED_SOURCES_CATEGORIES
+    from .constants import VALID_NON_PRIMARY_SOURCE_CONFIGS
+    from .constants import VALID_PRIMARY_SOURCE_CONFIGS
+    sources = given["sources"]
+    for category in REQUIRED_SOURCES_CATEGORIES:
+        if category not in sources:
+            raise AttributeError(f"config sources is missing required '{category}'")
+    for category, source in sources.items():
+        for key in REQUIRED_SOURCE_CONFIGS:
+            if key not in source:
+                raise AttributeError(f"config source '{category}' is missing required '{key}'")
+        if category == "primary":
+            for key in source:
+                if key not in VALID_PRIMARY_SOURCE_CONFIGS:
+                    nice_list = "\n".join([f"  {item}" for item in VALID_PRIMARY_SOURCE_CONFIGS])
+                    raise AttributeError(
+                        f"Invalid source '{category}' config parameter: '{key}'.\nValid sources config parameters are:\n{nice_list}"
+                    )
+        else:
+            for key in REQUIRED_NON_PRIMARY_SOURCE_CONFIGS:
+                if key not in source:
+                    raise AttributeError(f"config source '{category}' is missing required '{key}'")
+            for key in source:
+                if key not in VALID_NON_PRIMARY_SOURCE_CONFIGS:
+                    nice_list = "\n".join([f"  {item}" for item in VALID_NON_PRIMARY_SOURCE_CONFIGS])
+                    raise AttributeError(
+                        f"Invalid source '{category}' config parameter: '{key}'.\nValid source config parameters are:\n{nice_list}"
+                    )
+        files = source["files"]
+        if isinstance(files, str) and files in sources and files != category:
+            continue
+        if isinstance(files, list):
+            continue
+        raise TypeError(
+            f"'file' parameter of config source '{category}' is not a list or a reference to another source"
+        )
+    count = 0
+    while any(isinstance(source["files"], str) for source in sources.values()) and count < 100:
+        count += 1
+        for category, source in sources.items():
+            files = source["files"]
+            if isinstance(files, str):
+                given["sources"][category]["files"] = sources[files]["files"]
+    if count == 100:
+        raise RuntimeError("Check config sources for circular references")
+    return given
 def validate_truth_configs(given: dict) -> None:
     """Validate fields in given 'truth_configs'
@@ -104,27 +167,31 @@ def validate_truth_configs(given: dict) -> None:
     """
     from copy import deepcopy
-    from sonusai.mixture import truth_functions
+    from . import truth_functions
     from .constants import REQUIRED_TRUTH_CONFIGS
-    if "truth_configs" not in given:
-        raise AttributeError("config is missing required 'truth_configs'")
+    sources = given["sources"]
-    truth_configs = given["truth_configs"]
-    if len(truth_configs) == 0:
-        raise ValueError("'truth_configs' in config is empty")
+    for category, source in sources.items():
+        if "truth_configs" not in source:
+            continue
-    for name, truth_config in truth_configs.items():
-        for key in REQUIRED_TRUTH_CONFIGS:
-            if key not in truth_config:
-                raise AttributeError(f"'{name}' in truth_configs is missing required '{key}'")
+        truth_configs = source["truth_configs"]
+        if len(truth_configs) == 0:
+            raise ValueError(f"'truth_configs' in config source '{category}' is empty")
-        optional_config = deepcopy(truth_config)
-        for key in REQUIRED_TRUTH_CONFIGS:
-            del optional_config[key]
+        for truth_name, truth_config in truth_configs.items():
+            for k in REQUIRED_TRUTH_CONFIGS:
+                if k not in truth_config:
+                    raise AttributeError(
+                        f"'{truth_name}' in source '{category}' truth_configs is missing required '{k}'"
+                    )
+            optional_config = deepcopy(truth_config)
+            for k in REQUIRED_TRUTH_CONFIGS:
+                del optional_config[k]
-        getattr(truth_functions, truth_config["function"] + "_validate")(optional_config)
+            getattr(truth_functions, truth_config["function"] + "_validate")(optional_config)
 def validate_asr_configs(given: dict) -> None:
@@ -132,8 +199,7 @@ def validate_asr_configs(given: dict) -> None:
     :param given: The dictionary of given config
     """
-    from sonusai.utils import validate_asr
+    from ..utils.asr import validate_asr
     from .constants import REQUIRED_ASR_CONFIGS
     if "asr_configs" not in given:
@@ -209,69 +275,80 @@ def update_config_from_hierarchy(root: str, leaf: str, config: dict) -> dict:
     return new_config
-def get_target_files(config: dict, show_progress: bool = False) -> list[TargetFile]:
-    """Get the list of target files from a config
+def get_source_files(config: dict, show_progress: bool = False) -> list[SourceFile]:
+    """Get the list of source files from a config
     :param config: Config dictionary
     :param show_progress: Show progress bar
-    :return: List of target files
+    :return: List of source files
     """
     from itertools import chain
-    from sonusai.utils import dataclass_from_dict
-    from sonusai.utils import par_track
-    from sonusai.utils import track
+    from ..utils.parallel import par_track
+    from ..utils.parallel import track
-    from .datatypes import TargetFile
+    sources = config["sources"]
+    if not isinstance(sources, dict) and not all(isinstance(source, dict) for source in sources):
+        raise TypeError("'sources' must be a dictionary of dictionaries")
+    if "primary" not in sources:
+        raise AttributeError("'primary' is missing in 'sources'")
     class_indices = config["class_indices"]
     if not isinstance(class_indices, list):
         class_indices = [class_indices]
-    target_files = list(
-        chain.from_iterable(
-            [
-                append_target_files(
-                    entry=entry,
-                    class_indices=class_indices,
-                    truth_configs=config["truth_configs"],
-                    level_type=config["target_level_type"],
-                )
-                for entry in config["targets"]
-            ]
+    level_type = config["level_type"]
+    source_files: list[SourceFile] = []
+    for category in sources:
+        source_files.extend(
+            chain.from_iterable(
+                [
+                    append_source_files(
+                        category=category,
+                        entry=entry,
+                        class_indices=class_indices,
+                        truth_configs=sources[category].get("truth_configs", []),
+                        level_type=level_type,
+                    )
+                    for entry in sources[category]["files"]
+                ]
+            )
         )
-    )
-    progress = track(total=len(target_files), disable=not show_progress)
-    target_files = par_track(_get_num_samples, target_files, progress=progress)
+    progress = track(total=len(source_files), disable=not show_progress)
+    source_files = par_track(_get_num_samples, source_files, progress=progress)
     progress.close()
     num_classes = config["num_classes"]
-    for target_file in target_files:
-        if any(class_index < 0 for class_index in target_file["class_indices"]):
+    for source_file in source_files:
+        if any(class_index < 0 for class_index in source_file.class_indices):
             raise ValueError("class indices must contain only positive elements")
-        if any(class_index > num_classes for class_index in target_file["class_indices"]):
+        if any(class_index > num_classes for class_index in source_file.class_indices):
             raise ValueError(f"class index elements must not be greater than {num_classes}")
-    return dataclass_from_dict(list[TargetFile], target_files)
+    return source_files
-def append_target_files(
-    entry: dict | str,
+def append_source_files(
+    category: str,
+    entry: dict,
     class_indices: list[int],
     truth_configs: dict,
     level_type: str,
     tokens: dict | None = None,
-) -> list[dict]:
-    """Process target files list and append as needed
+) -> list[SourceFile]:
+    """Process source files list and append as needed
-    :param entry: Target file entry to append to the list
+    :param category: Source file category name
+    :param entry: Source file entry to append to the list
     :param class_indices: Class indices
     :param truth_configs: Truth configs
-    :param level_type: Target level type
+    :param level_type: Level type
     :param tokens: Tokens used for variable expansion
-    :return: List of target files
+    :return: List of source files
     """
     from copy import deepcopy
     from glob import glob
@@ -282,41 +359,39 @@ def append_target_files(
     from os.path import join
     from os.path import splitext
-    from sonusai.utils import dataclass_from_dict
+    from ..datatypes import TruthConfig
+    from ..utils.dataclass_from_dict import dataclass_from_dict
+    from ..utils.tokenized_shell_vars import tokenized_expand
+    from ..utils.tokenized_shell_vars import tokenized_replace
     from .audio import validate_input_file
     from .constants import REQUIRED_TRUTH_CONFIGS
-    from .datatypes import TruthConfig
-    from .tokenized_shell_vars import tokenized_expand
-    from .tokenized_shell_vars import tokenized_replace
     if tokens is None:
         tokens = {}
     truth_configs_merged = deepcopy(truth_configs)
-    if isinstance(entry, dict):
-        if "name" in entry:
-            in_name = entry["name"]
-        else:
-            raise AttributeError("Target list contained record without name")
-        if "class_indices" in entry:
-            if isinstance(entry["class_indices"], list):
-                class_indices = entry["class_indices"]
-            else:
-                class_indices = [entry["class_indices"]]
-        truth_configs_override = entry.get("truth_configs", {})
-        for key in truth_configs_override:
-            if key not in truth_configs:
-                raise AttributeError(
-                    f"Truth config '{key}' override specified for {entry['name']} is not defined at top level"
-                )
-            if key in truth_configs_override:
-                truth_configs_merged[key] |= truth_configs_override[key]
-        level_type = entry.get("level_type", level_type)
-    else:
-        in_name = entry
+    if not isinstance(entry, dict):
+        raise TypeError("'entry' must be a dictionary")
+    in_name = entry.get("name")
+    if in_name is None:
+        raise KeyError("Source file list contained record without name")
+    class_indices = entry.get("class_indices", class_indices)
+    if not isinstance(class_indices, list):
+        class_indices = [class_indices]
+    truth_configs_override = entry.get("truth_configs", {})
+    for key in truth_configs_override:
+        if key not in truth_configs:
+            raise AttributeError(
+                f"Truth config '{key}' override specified for {entry['name']} is not defined at top level"
+            )
+        if key in truth_configs_override:
+            truth_configs_merged[key] |= truth_configs_override[key]
+    level_type = entry.get("level_type", level_type)
     in_name, new_tokens = tokenized_expand(in_name)
     tokens.update(new_tokens)
@@ -324,7 +399,7 @@ def append_target_files(
     if not names:
         raise OSError(f"Could not find {in_name}. Make sure path exists")
-    target_files: list[dict] = []
+    source_files: list[SourceFile] = []
     for name in names:
         ext = splitext(name)[1].lower()
         dir_name = dirname(name)
@@ -333,9 +408,10 @@ def append_target_files(
                 child = file
                 if not isabs(child):
                     child = join(dir_name, child)
-                target_files.extend(
-                    append_target_files(
-                        entry=child,
+                source_files.extend(
+                    append_source_files(
+                        category=category,
+                        entry={"name": child},
                         class_indices=class_indices,
                         truth_configs=truth_configs_merged,
                         level_type=level_type,
@@ -355,41 +431,26 @@ def append_target_files(
                                 tokens.update(new_tokens)
                                 if not isabs(child):
                                     child = join(dir_name, child)
-                                target_files.extend(
-                                    append_target_files(
-                                        entry=child,
+                                source_files.extend(
+                                    append_source_files(
+                                        category=category,
+                                        entry={"name": child},
                                         class_indices=class_indices,
                                         truth_configs=truth_configs_merged,
                                         level_type=level_type,
                                         tokens=tokens,
                                     )
                                 )
-                elif ext == ".yml":
-                    try:
-                        yml_config = raw_load_config(name)
-                        if "targets" in yml_config:
-                            for record in yml_config["targets"]:
-                                target_files.extend(
-                                    append_target_files(
-                                        entry=record,
-                                        class_indices=class_indices,
-                                        truth_configs=truth_configs_merged,
-                                        level_type=level_type,
-                                        tokens=tokens,
-                                    )
-                                )
-                    except Exception as e:
-                        raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
-                    target_file: dict = {
-                        "expanded_name": name,
-                        "name": tokenized_replace(name, tokens),
-                        "class_indices": class_indices,
-                        "level_type": level_type,
-                        "truth_configs": {},
-                    }
+                    source_file = SourceFile(
+                        category=category,
+                        name=tokenized_replace(name, tokens),
+                        samples=0,
+                        class_indices=class_indices,
+                        level_type=level_type,
+                        truth_configs={},
+                    )
                     if len(truth_configs_merged) > 0:
                         for tc_key, tc_value in truth_configs_merged.items():
                             config = deepcopy(tc_value)
@@ -398,145 +459,58 @@ def append_target_files(
                                 truth_config[key] = config[key]
                                 del config[key]
                             truth_config["config"] = config
-                            target_file["truth_configs"][tc_key] = dataclass_from_dict(TruthConfig, truth_config)
-                        for tc_key in target_file["truth_configs"]:
+                            source_file.truth_configs[tc_key] = dataclass_from_dict(TruthConfig, truth_config)
+                        for tc_key in source_file.truth_configs:
                             if (
                                 "function" in truth_configs_merged[tc_key]
                                 and truth_configs_merged[tc_key]["function"] == "file"
                             ):
-                                truth_configs_merged[tc_key]["file"] = splitext(target_file["name"])[0] + ".h5"
-                    target_files.append(target_file)
-            except Exception as e:
-                raise OSError(f"Error processing {name}: {e}") from e
-    return target_files
-def get_noise_files(config: dict, show_progress: bool = False) -> list[NoiseFile]:
-    """Get the list of noise files from a config
-    :param config: Config dictionary
-    :param show_progress: Show progress bar
-    :return: List of noise file
-    """
-    from itertools import chain
-    from sonusai.utils import dataclass_from_dict
-    from sonusai.utils import par_track
-    from sonusai.utils import track
-    from .datatypes import NoiseFile
-    noise_files = list(chain.from_iterable([append_noise_files(entry=entry) for entry in config["noises"]]))
-    progress = track(total=len(noise_files), disable=not show_progress)
-    noise_files = par_track(_get_num_samples, noise_files, progress=progress)
-    progress.close()
-    return dataclass_from_dict(list[NoiseFile], noise_files)
-def append_noise_files(entry: dict | str, tokens: dict | None = None) -> list[dict]:
-    """Process noise files list and append as needed
-    :param entry: Noise file entry to append to the list
-    :param tokens: Tokens used for variable expansion
-    :return: List of noise files
-    """
-    from glob import glob
-    from os import listdir
-    from os.path import dirname
-    from os.path import isabs
-    from os.path import isdir
-    from os.path import join
-    from os.path import splitext
-    from .audio import validate_input_file
-    from .tokenized_shell_vars import tokenized_expand
-    from .tokenized_shell_vars import tokenized_replace
-    if tokens is None:
-        tokens = {}
-    if isinstance(entry, dict):
-        if "name" in entry:
-            in_name = entry["name"]
-        else:
-            raise AttributeError("Noise list contained record without name")
-    else:
-        in_name = entry
-    in_name, new_tokens = tokenized_expand(in_name)
-    tokens.update(new_tokens)
-    names = sorted(glob(in_name))
-    if not names:
-        raise OSError(f"Could not find {in_name}. Make sure path exists")
-    noise_files: list[dict] = []
-    for name in names:
-        ext = splitext(name)[1].lower()
-        dir_name = dirname(name)
-        if isdir(name):
-            for file in listdir(name):
-                child = file
-                if not isabs(child):
-                    child = join(dir_name, child)
-                noise_files.extend(append_noise_files(entry=child, tokens=tokens))
-        else:
-            try:
-                if ext == ".txt":
-                    with open(file=name) as txt_file:
-                        for line in txt_file:
-                            # strip comments
-                            child = line.partition("#")[0]
-                            child = child.rstrip()
-                            if child:
-                                child, new_tokens = tokenized_expand(child)
-                                tokens.update(new_tokens)
-                                if not isabs(child):
-                                    child = join(dir_name, child)
-                                noise_files.extend(append_noise_files(entry=child, tokens=tokens))
-                elif ext == ".yml":
-                    try:
-                        yml_config = raw_load_config(name)
-                        if "noises" in yml_config:
-                            for record in yml_config["noises"]:
-                                noise_files.extend(append_noise_files(entry=record, tokens=tokens))
-                    except Exception as e:
-                        raise OSError(f"Error processing {name}: {e}") from e
-                else:
-                    validate_input_file(name)
-                    noise_file: dict = {
-                        "expanded_name": name,
-                        "name": tokenized_replace(name, tokens),
-                    }
-                    noise_files.append(noise_file)
+                                truth_configs_merged[tc_key]["file"] = splitext(source_file.name)[0] + ".h5"
+                    source_files.append(source_file)
             except Exception as e:
                 raise OSError(f"Error processing {name}: {e}") from e
-    return noise_files
+    return source_files
-def get_impulse_response_files(config: dict) -> list[ImpulseResponseFile]:
+def get_ir_files(config: dict, show_progress: bool = False) -> list[ImpulseResponseFile]:
     """Get the list of impulse response files from a config
     :param config: Config dictionary
+    :param show_progress: Show progress bar
     :return: List of impulse response files
     """
     from itertools import chain
-    return list(
+    from ..utils.parallel import par_track
+    from ..utils.parallel import track
+    ir_files = list(
         chain.from_iterable(
             [
-                append_impulse_response_files(entry=ImpulseResponseFile(entry["name"], entry.get("tags", []), 0))
+                append_ir_files(
+                    entry=ImpulseResponseFile(
+                        name=entry["name"],
+                        tags=entry.get("tags", []),
+                        delay=entry.get("delay", "auto"),
+                    )
+                )
                 for entry in config["impulse_responses"]
             ]
         )
     )
+    if len(ir_files) == 0:
+        return []
+    progress = track(total=len(ir_files), disable=not show_progress)
+    ir_files = par_track(_get_ir_delay, ir_files, progress=progress)
+    progress.close()
+    return ir_files
-def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | None = None) -> list[ImpulseResponseFile]:
+def append_ir_files(entry: ImpulseResponseFile, tokens: dict | None = None) -> list[ImpulseResponseFile]:
     """Process impulse response files list and append as needed
     :param entry: Impulse response file entry to append to the list
@@ -551,21 +525,20 @@ def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | Non
     from os.path import join
     from os.path import splitext
+    from ..utils.tokenized_shell_vars import tokenized_expand
+    from ..utils.tokenized_shell_vars import tokenized_replace
     from .audio import validate_input_file
-    from .ir_delay import get_impulse_response_delay
-    from .tokenized_shell_vars import tokenized_expand
-    from .tokenized_shell_vars import tokenized_replace
     if tokens is None:
         tokens = {}
-    in_name, new_tokens = tokenized_expand(entry.file)
+    in_name, new_tokens = tokenized_expand(entry.name)
     tokens.update(new_tokens)
     names = sorted(glob(in_name))
     if not names:
         raise OSError(f"Could not find {in_name}. Make sure path exists")
-    impulse_response_files: list[ImpulseResponseFile] = []
+    ir_files: list[ImpulseResponseFile] = []
     for name in names:
         ext = splitext(name)[1].lower()
         dir_name = dirname(name)
@@ -573,8 +546,8 @@ def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | Non
             for file in listdir(name):
                 if not isabs(file):
                     file = join(dir_name, file)
-                child = ImpulseResponseFile(file, entry.tags, get_impulse_response_delay(file))
-                impulse_response_files.extend(append_impulse_response_files(entry=child, tokens=tokens))
+                child = ImpulseResponseFile(file, entry.tags, entry.delay)
+                ir_files.extend(append_ir_files(entry=child, tokens=tokens))
         else:
             try:
                 if ext == ".txt":
@@ -588,30 +561,24 @@ def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | Non
                                 tokens.update(new_tokens)
                                 if not isabs(file):
                                     file = join(dir_name, file)
-                                child = ImpulseResponseFile(file, entry.tags, get_impulse_response_delay(file))
-                                impulse_response_files.extend(append_impulse_response_files(entry=child, tokens=tokens))
+                                child = ImpulseResponseFile(file, entry.tags, entry.delay)
+                                ir_files.extend(append_ir_files(entry=child, tokens=tokens))
                 elif ext == ".yml":
                     try:
                         yml_config = raw_load_config(name)
                         if "impulse_responses" in yml_config:
                             for record in yml_config["impulse_responses"]:
-                                impulse_response_files.extend(
-                                    append_impulse_response_files(entry=record, tokens=tokens)
-                                )
+                                ir_files.extend(append_ir_files(entry=record, tokens=tokens))
                     except Exception as e:
                         raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
-                    impulse_response_files.append(
-                        ImpulseResponseFile(
-                            tokenized_replace(name, tokens), entry.tags, get_impulse_response_delay(name)
-                        )
-                    )
+                    ir_files.append(ImpulseResponseFile(tokenized_replace(name, tokens), entry.tags, entry.delay))
             except Exception as e:
                 raise OSError(f"Error processing {name}: {e}") from e
-    return impulse_response_files
+    return ir_files
 def get_spectral_masks(config: dict) -> list[SpectralMask]:
@@ -620,10 +587,10 @@ def get_spectral_masks(config: dict) -> list[SpectralMask]:
     :param config: Config dictionary
     :return: List of spectral masks
     """
-    from sonusai.utils import dataclass_from_dict
+    from ..utils.dataclass_from_dict import list_dataclass_from_dict
     try:
-        return dataclass_from_dict(list[SpectralMask], config["spectral_masks"])
+        return list_dataclass_from_dict(list[SpectralMask], config["spectral_masks"])
     except Exception as e:
         raise ValueError(f"Error in spectral_masks: {e}") from e
@@ -636,30 +603,43 @@ def get_truth_parameters(config: dict) -> list[TruthParameter]:
     """
     from copy import deepcopy
-    from sonusai.mixture import truth_functions
+    from . import truth_functions
     from .constants import REQUIRED_TRUTH_CONFIGS
-    from .datatypes import TruthParameter
     truth_parameters: list[TruthParameter] = []
-    for name, truth_config in config["truth_configs"].items():
-        optional_config = deepcopy(truth_config)
-        for key in REQUIRED_TRUTH_CONFIGS:
-            del optional_config[key]
-        parameters = getattr(truth_functions, truth_config["function"] + "_parameters")(
-            config["feature"],
-            config["num_classes"],
-            optional_config,
-        )
-        truth_parameters.append(TruthParameter(name, parameters))
+    for category, source_config in config["sources"].items():
+        if "truth_configs" in source_config:
+            for truth_name, truth_config in source_config["truth_configs"].items():
+                optional_config = deepcopy(truth_config)
+                for key in REQUIRED_TRUTH_CONFIGS:
+                    del optional_config[key]
+                parameters = getattr(truth_functions, truth_config["function"] + "_parameters")(
+                    config["feature"],
+                    config["num_classes"],
+                    optional_config,
+                )
+                truth_parameters.append(TruthParameter(category, truth_name, parameters))
     return truth_parameters
-def _get_num_samples(entry: dict) -> dict:
+def _get_num_samples(entry: SourceFile) -> SourceFile:
     from .audio import get_num_samples
-    entry["samples"] = get_num_samples(entry["expanded_name"])
-    del entry["expanded_name"]
+    entry.samples = get_num_samples(entry.name)
+    return entry
+def _get_ir_delay(entry: ImpulseResponseFile) -> ImpulseResponseFile:
+    from .ir_delay import get_ir_delay
+    if entry.delay == "auto":
+        entry.delay = get_ir_delay(entry.name)
+    else:
+        try:
+            entry.delay = int(entry.delay)
+        except ValueError as e:
+            raise ValueError(f"Invalid impulse response delay: {entry.delay}") from e
     return entry

sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl