PyPI - sonusai - Versions diffs - 0.18.9__py3-none-any.whl → 0.19.5__py3-none-any.whl - Mend

sonusai 0.18.9py3-none-any.whl → 0.19.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

sonusai/__init__.py +20 -29
sonusai/aawscd_probwrite.py +18 -18
sonusai/audiofe.py +93 -80
sonusai/calc_metric_spenh.py +395 -321
sonusai/data/genmixdb.yml +5 -11
sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
sonusai/{plot.py → deprecated/plot.py} +177 -131
sonusai/{tplot.py → deprecated/tplot.py} +124 -102
sonusai/doc/__init__.py +1 -1
sonusai/doc/doc.py +112 -177
sonusai/doc.py +10 -10
sonusai/genft.py +93 -77
sonusai/genmetrics.py +59 -46
sonusai/genmix.py +116 -104
sonusai/genmixdb.py +194 -153
sonusai/lsdb.py +56 -66
sonusai/main.py +23 -20
sonusai/metrics/__init__.py +2 -0
sonusai/metrics/calc_audio_stats.py +29 -24
sonusai/metrics/calc_class_weights.py +7 -7
sonusai/metrics/calc_optimal_thresholds.py +5 -7
sonusai/metrics/calc_pcm.py +3 -3
sonusai/metrics/calc_pesq.py +10 -7
sonusai/metrics/calc_phase_distance.py +3 -3
sonusai/metrics/calc_sa_sdr.py +10 -8
sonusai/metrics/calc_segsnr_f.py +15 -17
sonusai/metrics/calc_speech.py +105 -47
sonusai/metrics/calc_wer.py +35 -32
sonusai/metrics/calc_wsdr.py +10 -7
sonusai/metrics/class_summary.py +30 -27
sonusai/metrics/confusion_matrix_summary.py +25 -22
sonusai/metrics/one_hot.py +91 -57
sonusai/metrics/snr_summary.py +53 -46
sonusai/mixture/__init__.py +19 -14
sonusai/mixture/audio.py +4 -6
sonusai/mixture/augmentation.py +37 -43
sonusai/mixture/class_count.py +5 -14
sonusai/mixture/config.py +292 -225
sonusai/mixture/constants.py +41 -30
sonusai/mixture/data_io.py +155 -0
sonusai/mixture/datatypes.py +111 -108
sonusai/mixture/db_datatypes.py +54 -70
sonusai/mixture/eq_rule_is_valid.py +6 -9
sonusai/mixture/feature.py +40 -38
sonusai/mixture/generation.py +522 -389
sonusai/mixture/helpers.py +217 -272
sonusai/mixture/log_duration_and_sizes.py +16 -13
sonusai/mixture/mixdb.py +669 -477
sonusai/mixture/soundfile_audio.py +12 -17
sonusai/mixture/sox_audio.py +91 -112
sonusai/mixture/sox_augmentation.py +8 -9
sonusai/mixture/spectral_mask.py +4 -6
sonusai/mixture/target_class_balancing.py +41 -36
sonusai/mixture/targets.py +69 -67
sonusai/mixture/tokenized_shell_vars.py +23 -23
sonusai/mixture/torchaudio_audio.py +14 -15
sonusai/mixture/torchaudio_augmentation.py +23 -27
sonusai/mixture/truth.py +48 -26
sonusai/mixture/truth_functions/__init__.py +26 -0
sonusai/mixture/truth_functions/crm.py +56 -38
sonusai/mixture/truth_functions/datatypes.py +37 -0
sonusai/mixture/truth_functions/energy.py +85 -59
sonusai/mixture/truth_functions/file.py +30 -30
sonusai/mixture/truth_functions/phoneme.py +14 -7
sonusai/mixture/truth_functions/sed.py +71 -45
sonusai/mixture/truth_functions/target.py +69 -106
sonusai/mkwav.py +52 -85
sonusai/onnx_predict.py +46 -43
sonusai/queries/__init__.py +3 -1
sonusai/queries/queries.py +100 -59
sonusai/speech/__init__.py +2 -0
sonusai/speech/l2arctic.py +24 -23
sonusai/speech/librispeech.py +16 -17
sonusai/speech/mcgill.py +22 -21
sonusai/speech/textgrid.py +32 -25
sonusai/speech/timit.py +45 -42
sonusai/speech/vctk.py +14 -13
sonusai/speech/voxceleb.py +26 -20
sonusai/summarize_metric_spenh.py +11 -10
sonusai/utils/__init__.py +4 -3
sonusai/utils/asl_p56.py +1 -1
sonusai/utils/asr.py +37 -17
sonusai/utils/asr_functions/__init__.py +2 -0
sonusai/utils/asr_functions/aaware_whisper.py +18 -12
sonusai/utils/audio_devices.py +12 -12
sonusai/utils/braced_glob.py +6 -8
sonusai/utils/calculate_input_shape.py +1 -4
sonusai/utils/compress.py +2 -2
sonusai/utils/convert_string_to_number.py +1 -3
sonusai/utils/create_timestamp.py +1 -1
sonusai/utils/create_ts_name.py +2 -2
sonusai/utils/dataclass_from_dict.py +1 -1
sonusai/utils/docstring.py +6 -6
sonusai/utils/energy_f.py +9 -7
sonusai/utils/engineering_number.py +56 -54
sonusai/utils/get_label_names.py +8 -10
sonusai/utils/human_readable_size.py +2 -2
sonusai/utils/model_utils.py +3 -5
sonusai/utils/numeric_conversion.py +2 -4
sonusai/utils/onnx_utils.py +43 -32
sonusai/utils/parallel.py +40 -27
sonusai/utils/print_mixture_details.py +25 -22
sonusai/utils/ranges.py +12 -12
sonusai/utils/read_predict_data.py +11 -9
sonusai/utils/reshape.py +19 -26
sonusai/utils/seconds_to_hms.py +1 -1
sonusai/utils/stacked_complex.py +8 -16
sonusai/utils/stratified_shuffle_split.py +29 -27
sonusai/utils/write_audio.py +2 -2
sonusai/utils/yes_or_no.py +3 -3
sonusai/vars.py +14 -14
{sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
sonusai-0.19.5.dist-info/RECORD +125 -0
{sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
sonusai/mixture/truth_functions/data.py +0 -58
sonusai/utils/read_mixture_data.py +0 -14
sonusai-0.18.9.dist-info/RECORD +0 -125
{sonusai-0.18.9.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0

sonusai/mixture/config.py CHANGED Viewed

@@ -1,7 +1,9 @@
+from sonusai.mixture.datatypes import ImpulseResponseFile
 from sonusai.mixture.datatypes import ImpulseResponseFiles
 from sonusai.mixture.datatypes import NoiseFiles
 from sonusai.mixture.datatypes import SpectralMasks
 from sonusai.mixture.datatypes import TargetFiles
+from sonusai.mixture.datatypes import TruthParameters
 def raw_load_config(name: str) -> dict:
@@ -12,7 +14,7 @@ def raw_load_config(name: str) -> dict:
     """
     import yaml
-    with open(file=name, mode='r') as f:
+    with open(file=name) as f:
         config = yaml.safe_load(f)
     return config
@@ -23,13 +25,12 @@ def get_default_config() -> dict:
     :return: Dictionary of default config data
     """
-    from sonusai import SonusAIError
     from .constants import DEFAULT_CONFIG
     try:
         return raw_load_config(DEFAULT_CONFIG)
     except Exception as e:
-        raise SonusAIError(f'Error loading default config: {e}')
+        raise OSError(f"Error loading default config: {e}") from e
 def load_config(name: str) -> dict:
@@ -40,125 +41,115 @@ def load_config(name: str) -> dict:
     """
     from os.path import join
-    return update_config_from_file(name=join(name, 'config.yml'), config=get_default_config())
+    return update_config_from_file(filename=join(name, "config.yml"), given_config=get_default_config())
-def update_config_from_file(name: str, config: dict) -> dict:
-    """Update the given config with the config in the YAML file
+def update_config_from_file(filename: str, given_config: dict) -> dict:
+    """Update the given config with the config in the specified YAML file
-    :param name: File name
-    :param config: Config dictionary to update
+    :param filename: File name
+    :param given_config: Config dictionary to update
     :return: Updated config dictionary
     """
     from copy import deepcopy
-    from sonusai import SonusAIError
     from .constants import REQUIRED_CONFIGS
     from .constants import VALID_CONFIGS
     from .constants import VALID_NOISE_MIX_MODES
-    updated_config = deepcopy(config)
+    updated_config = deepcopy(given_config)
     try:
-        new_config = raw_load_config(name)
+        file_config = raw_load_config(filename)
     except Exception as e:
-        raise SonusAIError(f'Error loading config from {name}: {e}')
+        raise OSError(f"Error loading config from {filename}: {e}") from e
     # Check for unrecognized keys
-    for key in new_config:
+    for key in file_config:
         if key not in VALID_CONFIGS:
-            nice_list = '\n'.join([f'  {item}' for item in VALID_CONFIGS])
-            raise SonusAIError(f'Invalid config parameter in {name}: {key}.\n'
-                               f'Valid config parameters are:\n{nice_list}')
+            nice_list = "\n".join([f"  {item}" for item in VALID_CONFIGS])
+            raise AttributeError(
+                f"Invalid config parameter in {filename}: {key}.\nValid config parameters are:\n{nice_list}"
+            )
     # Use default config as base and overwrite with given config keys as found
     for key in updated_config:
-        if key in new_config:
-            if key not in ['truth_settings']:
-                updated_config[key] = new_config[key]
-    # Handle 'truth_settings' special case
-    if 'truth_settings' in new_config:
-        updated_config['truth_settings'] = deepcopy(new_config['truth_settings'])
-    if not isinstance(updated_config['truth_settings'], list):
-        updated_config['truth_settings'] = [updated_config['truth_settings']]
-    default = deepcopy(config['truth_settings'])
-    if not isinstance(default, list):
-        default = [default]
-    updated_config['truth_settings'] = update_truth_settings(updated_config['truth_settings'], default)
-    # Handle 'asr_configs' special case
-    if 'asr_configs' in updated_config:
-        asr_configs = {}
-        for asr_config in updated_config['asr_configs']:
-            asr_name = asr_config.get('name', None)
-            asr_engine = asr_config.get('engine', None)
-            if asr_name is None or asr_engine is None:
-                raise SonusAIError(f'Invalid config parameter in {name}: asr_configs.\n'
-                                   f'asr_configs must contain both name and engine.')
-            del asr_config['name']
-            asr_configs[asr_name] = asr_config
-        updated_config['asr_configs'] = asr_configs
+        if key in file_config:
+            updated_config[key] = file_config[key]
     # Check for required keys
     for key in REQUIRED_CONFIGS:
         if key not in updated_config:
-            raise SonusAIError(f'Missing required config in {name}: {key}')
+            raise AttributeError(f"{filename} is missing required '{key}'")
+    # Validate special cases
+    validate_truth_configs(updated_config)
+    validate_asr_configs(updated_config)
     # Check for non-empty spectral masks
-    if len(updated_config['spectral_masks']) == 0:
-        updated_config['spectral_masks'] = config['spectral_masks']
+    if len(updated_config["spectral_masks"]) == 0:
+        updated_config["spectral_masks"] = given_config["spectral_masks"]
     # Check for valid noise_mix_mode
-    if updated_config['noise_mix_mode'] not in VALID_NOISE_MIX_MODES:
-        nice_list = '\n'.join([f'  {item}' for item in VALID_NOISE_MIX_MODES])
-        raise SonusAIError(f'Invalid noise_mix_mode in {name}.\n'
-                           f'Valid noise mix modes are:\n{nice_list}')
+    if updated_config["noise_mix_mode"] not in VALID_NOISE_MIX_MODES:
+        nice_list = "\n".join([f"  {item}" for item in VALID_NOISE_MIX_MODES])
+        raise ValueError(f"{filename} contains invalid noise_mix_mode.\nValid noise mix modes are:\n{nice_list}")
     return updated_config
-def update_truth_settings(given: list[dict] | dict, default: list[dict] = None) -> list[dict]:
-    """Update missing fields in given 'truth_settings' with default values
+def validate_truth_configs(given: dict) -> None:
+    """Validate fields in given 'truth_configs'
-    :param given: The dictionary of given truth settings
-    :param default: The dictionary of default truth settings
-    :return: Updated dictionary of truth settings
+    :param given: The dictionary of given config
     """
     from copy import deepcopy
-    from sonusai import SonusAIError
-    from .constants import VALID_TRUTH_SETTINGS
+    from sonusai.mixture import truth_functions
-    if isinstance(given, list):
-        truth_settings = deepcopy(given)
-    else:
-        truth_settings = [deepcopy(given)]
+    from .constants import REQUIRED_TRUTH_CONFIGS
-    if default is not None and len(truth_settings) != len(default):
-        raise SonusAIError(f'Length of given does not match default')
+    if "truth_configs" not in given:
+        raise AttributeError("config is missing required 'truth_configs'")
-    for n in range(len(truth_settings)):
-        for key in truth_settings[n]:
-            if key not in VALID_TRUTH_SETTINGS:
-                nice_list = '\n'.join([f'  {item}' for item in VALID_TRUTH_SETTINGS])
-                raise SonusAIError(f'Invalid truth_settings: {key}.\nValid truth_settings are:\n{nice_list}')
+    truth_configs = given["truth_configs"]
+    if len(truth_configs) == 0:
+        raise ValueError("'truth_configs' in config is empty")
-        for key in VALID_TRUTH_SETTINGS:
-            if key not in truth_settings[n]:
-                if default is not None and key in default[n]:
-                    truth_settings[n][key] = default[n][key]
-                else:
-                    raise SonusAIError(f'Missing required truth_settings: {key}')
+    for name, truth_config in truth_configs.items():
+        for key in REQUIRED_TRUTH_CONFIGS:
+            if key not in truth_config:
+                raise AttributeError(f"'{name}' in truth_configs is missing required '{key}'")
+        optional_config = deepcopy(truth_config)
+        for key in REQUIRED_TRUTH_CONFIGS:
+            del optional_config[key]
+        getattr(truth_functions, truth_config["function"] + "_validate")(optional_config)
+def validate_asr_configs(given: dict) -> None:
+    """Validate fields in given 'asr_config'
+    :param given: The dictionary of given config
+    """
+    from sonusai.utils import validate_asr
+    from .constants import REQUIRED_ASR_CONFIGS
+    if "asr_configs" not in given:
+        raise AttributeError("config is missing required 'asr_configs'")
+    asr_configs = given["asr_configs"]
-    for truth_setting in truth_settings:
-        if not isinstance(truth_setting['index'], list):
-            truth_setting['index'] = [truth_setting['index']]
+    for name, asr_config in asr_configs.items():
+        for key in REQUIRED_ASR_CONFIGS:
+            if key not in asr_config:
+                raise AttributeError(f"'{name}' in asr_configs is missing required '{key}'")
-    return truth_settings
+        engine = asr_config["engine"]
+        config = {x: asr_config[x] for x in asr_config if x != "engine"}
+        validate_asr(engine, **config)
 def get_hierarchical_config_files(root: str, leaf: str) -> list[str]:
@@ -171,25 +162,23 @@ def get_hierarchical_config_files(root: str, leaf: str) -> list[str]:
     import os
     from pathlib import Path
-    from sonusai import SonusAIError
-    config_file = 'config.yml'
+    config_file = "config.yml"
     root_path = Path(os.path.abspath(root))
     if not root_path.is_dir():
-        raise SonusAIError(f'Given root, {root_path}, is not a directory.')
+        raise OSError(f"Given root, {root_path}, is not a directory.")
     leaf_path = Path(os.path.abspath(leaf))
     if not leaf_path.is_dir():
-        raise SonusAIError(f'Given leaf, {leaf_path}, is not a directory.')
+        raise OSError(f"Given leaf, {leaf_path}, is not a directory.")
     common = os.path.commonpath((root_path, leaf_path))
     if os.path.normpath(common) != os.path.normpath(root_path):
-        raise SonusAIError(f'Given leaf, {leaf_path}, is not in the hierarchy of the given root, {root_path}')
+        raise OSError(f"Given leaf, {leaf_path}, is not in the hierarchy of the given root, {root_path}")
     top_config_file = os.path.join(root_path, config_file)
     if not Path(top_config_file).is_file():
-        raise SonusAIError(f'Could not find {top_config_file}')
+        raise OSError(f"Could not find {top_config_file}")
     current = leaf_path
     config_files = []
@@ -216,24 +205,11 @@ def update_config_from_hierarchy(root: str, leaf: str, config: dict) -> dict:
     new_config = deepcopy(config)
     config_files = get_hierarchical_config_files(root=root, leaf=leaf)
     for config_file in config_files:
-        new_config = update_config_from_file(name=config_file, config=new_config)
+        new_config = update_config_from_file(filename=config_file, given_config=new_config)
     return new_config
-def get_max_class(num_classes: int, truth_mutex: bool) -> int:
-    """Get the maximum class index
-    :param num_classes: Number of classes
-    :param truth_mutex: Truth is mutex mode
-    :return: Highest class index
-    """
-    max_class = num_classes
-    if truth_mutex:
-        max_class -= 1
-    return max_class
 def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
     """Get the list of target files from a config
@@ -243,48 +219,62 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
     """
     from itertools import chain
-    from tqdm import tqdm
-    from sonusai import SonusAIError
     from sonusai.utils import dataclass_from_dict
-    from sonusai.utils import pp_tqdm_imap
-    from .datatypes import TargetFiles
+    from sonusai.utils import par_track
+    from sonusai.utils import track
-    truth_settings = config.get('truth_settings', list())
-    level_type = config.get('target_level_type', None)
-    target_files = list(chain.from_iterable([append_target_files(entry=entry,
-                                                                 truth_settings=truth_settings,
-                                                                 level_type=level_type)
-                                             for entry in config['targets']]))
+    from .datatypes import TargetFiles
-    progress = tqdm(total=len(target_files), disable=not show_progress)
-    target_files = pp_tqdm_imap(_get_num_samples, target_files, progress=progress)
+    class_indices = config["class_indices"]
+    if not isinstance(class_indices, list):
+        class_indices = [class_indices]
+    target_files = list(
+        chain.from_iterable(
+            [
+                append_target_files(
+                    entry=entry,
+                    class_indices=class_indices,
+                    truth_configs=config["truth_configs"],
+                    level_type=config["target_level_type"],
+                )
+                for entry in config["targets"]
+            ]
+        )
+    )
+    progress = track(total=len(target_files), disable=not show_progress)
+    target_files = par_track(_get_num_samples, target_files, progress=progress)
     progress.close()
-    max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
+    num_classes = config["num_classes"]
     for target_file in target_files:
-        target_file['truth_settings'] = update_truth_settings(target_file['truth_settings'], config['truth_settings'])
+        if any(class_index < 0 for class_index in target_file["class_indices"]):
+            raise ValueError("class indices must contain only positive elements")
-        for truth_setting in target_file['truth_settings']:
-            if any(idx > max_class for idx in truth_setting['index']):
-                raise SonusAIError('invalid truth index')
+        if any(class_index > num_classes for class_index in target_file["class_indices"]):
+            raise ValueError(f"class index elements must not be greater than {num_classes}")
     return dataclass_from_dict(TargetFiles, target_files)
-def append_target_files(entry: dict | str,
-                        truth_settings: list[dict],
-                        level_type: str | None,
-                        tokens: dict = None) -> list[dict]:
+def append_target_files(
+    entry: dict | str,
+    class_indices: list[int],
+    truth_configs: dict,
+    level_type: str,
+    tokens: dict | None = None,
+) -> list[dict]:
     """Process target files list and append as needed
     :param entry: Target file entry to append to the list
-    :param truth_settings: Truth settings
+    :param class_indices: Class indices
+    :param truth_configs: Truth configs
     :param level_type: Target level type
     :param tokens: Tokens used for variable expansion
     :return: List of target files
     """
+    from copy import deepcopy
     from glob import glob
     from os import listdir
     from os.path import dirname
@@ -293,8 +283,11 @@ def append_target_files(entry: dict | str,
     from os.path import join
     from os.path import splitext
-    from sonusai import SonusAIError
+    from sonusai.utils import dataclass_from_dict
     from .audio import validate_input_file
+    from .constants import REQUIRED_TRUTH_CONFIGS
+    from .datatypes import TruthConfig
     from .tokenized_shell_vars import tokenized_expand
     from .tokenized_shell_vars import tokenized_replace
@@ -302,23 +295,38 @@ def append_target_files(entry: dict | str,
         tokens = {}
     if isinstance(entry, dict):
-        if 'name' in entry:
-            in_name = entry['name']
+        if "name" in entry:
+            in_name = entry["name"]
         else:
-            raise SonusAIError('Target list contained record without name')
-        if 'truth_settings' in entry:
-            truth_settings = entry['truth_settings']
-        if 'target_level_type' in entry:
-            level_type = entry['target_level_type']
+            raise AttributeError("Target list contained record without name")
+        if "class_indices" in entry:
+            if isinstance(entry["class_indices"], list):
+                class_indices = entry["class_indices"]
+            else:
+                class_indices = [entry["class_indices"]]
+        truth_configs_override = entry.get("truth_configs", {})
+        for key in truth_configs_override:
+            if key not in truth_configs:
+                raise AttributeError(
+                    f"Truth config '{key}' override specified for {entry['name']} is not defined at top level"
+                )
+        truth_configs_merged = {}
+        for key in truth_configs_override:
+            truth_configs_merged[key] = deepcopy(truth_configs[key])
+            if truth_configs_override[key] is not None:
+                truth_configs_merged[key] |= truth_configs_override[key]
+        level_type = entry.get("level_type", level_type)
     else:
         in_name = entry
+        truth_configs_merged = deepcopy(truth_configs)
     in_name, new_tokens = tokenized_expand(in_name)
     tokens.update(new_tokens)
     names = sorted(glob(in_name))
     if not names:
-        raise SonusAIError(f'Could not find {in_name}. Make sure path exists')
+        raise OSError(f"Could not find {in_name}. Make sure path exists")
     target_files: list[dict] = []
     for name in names:
@@ -329,57 +337,81 @@ def append_target_files(entry: dict | str,
                 child = file
                 if not isabs(child):
                     child = join(dir_name, child)
-                target_files.extend(append_target_files(entry=child,
-                                                        truth_settings=truth_settings,
-                                                        level_type=level_type,
-                                                        tokens=tokens))
+                target_files.extend(
+                    append_target_files(
+                        entry=child,
+                        class_indices=class_indices,
+                        truth_configs=truth_configs_merged,
+                        level_type=level_type,
+                        tokens=tokens,
+                    )
+                )
         else:
             try:
-                if ext == '.txt':
-                    with open(file=name, mode='r') as txt_file:
+                if ext == ".txt":
+                    with open(file=name) as txt_file:
                         for line in txt_file:
                             # strip comments
-                            child = line.partition('#')[0]
+                            child = line.partition("#")[0]
                             child = child.rstrip()
                             if child:
                                 child, new_tokens = tokenized_expand(child)
                                 tokens.update(new_tokens)
                                 if not isabs(child):
                                     child = join(dir_name, child)
-                                target_files.extend(append_target_files(entry=child,
-                                                                        truth_settings=truth_settings,
-                                                                        level_type=level_type,
-                                                                        tokens=tokens))
-                elif ext == '.yml':
+                                target_files.extend(
+                                    append_target_files(
+                                        entry=child,
+                                        class_indices=class_indices,
+                                        truth_configs=truth_configs_merged,
+                                        level_type=level_type,
+                                        tokens=tokens,
+                                    )
+                                )
+                elif ext == ".yml":
                     try:
                         yml_config = raw_load_config(name)
-                        if 'targets' in yml_config:
-                            for record in yml_config['targets']:
-                                target_files.extend(append_target_files(entry=record,
-                                                                        truth_settings=truth_settings,
-                                                                        level_type=level_type,
-                                                                        tokens=tokens))
+                        if "targets" in yml_config:
+                            for record in yml_config["targets"]:
+                                target_files.extend(
+                                    append_target_files(
+                                        entry=record,
+                                        class_indices=class_indices,
+                                        truth_configs=truth_configs_merged,
+                                        level_type=level_type,
+                                        tokens=tokens,
+                                    )
+                                )
                     except Exception as e:
-                        raise SonusAIError(f'Error processing {name}: {e}')
+                        raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
                     target_file: dict = {
-                        'expanded_name': name,
-                        'name':          tokenized_replace(name, tokens),
+                        "expanded_name": name,
+                        "name": tokenized_replace(name, tokens),
+                        "class_indices": class_indices,
+                        "level_type": level_type,
+                        "truth_configs": {},
                     }
-                    if len(truth_settings) > 0:
-                        target_file['truth_settings'] = truth_settings
-                        for truth_setting in target_file['truth_settings']:
-                            if 'function' in truth_setting and truth_setting['function'] == 'file':
-                                truth_setting['config']['file'] = splitext(target_file['name'])[0] + '.h5'
-                    if level_type is not None:
-                        target_file['level_type'] = level_type
+                    if len(truth_configs_merged) > 0:
+                        for tc_key, tc_value in truth_configs_merged.items():
+                            config = deepcopy(tc_value)
+                            truth_config: dict = {}
+                            for key in REQUIRED_TRUTH_CONFIGS:
+                                truth_config[key] = config[key]
+                                del config[key]
+                            truth_config["config"] = config
+                            target_file["truth_configs"][tc_key] = dataclass_from_dict(TruthConfig, truth_config)
+                        for tc_key in target_file["truth_configs"]:
+                            if (
+                                "function" in truth_configs_merged[tc_key]
+                                and truth_configs_merged[tc_key]["function"] == "file"
+                            ):
+                                truth_configs_merged[tc_key]["file"] = splitext(target_file["name"])[0] + ".h5"
                     target_files.append(target_file)
-            except SonusAIError:
-                raise
             except Exception as e:
-                raise SonusAIError(f'Error processing {name}: {e}')
+                raise OSError(f"Error processing {name}: {e}") from e
     return target_files
@@ -393,22 +425,22 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
     """
     from itertools import chain
-    from tqdm import tqdm
     from sonusai.utils import dataclass_from_dict
-    from sonusai.utils import pp_tqdm_imap
+    from sonusai.utils import par_track
+    from sonusai.utils import track
     from .datatypes import NoiseFiles
-    noise_files = list(chain.from_iterable([append_noise_files(entry=entry) for entry in config['noises']]))
+    noise_files = list(chain.from_iterable([append_noise_files(entry=entry) for entry in config["noises"]]))
-    progress = tqdm(total=len(noise_files), disable=not show_progress)
-    noise_files = pp_tqdm_imap(_get_num_samples, noise_files, progress=progress)
+    progress = track(total=len(noise_files), disable=not show_progress)
+    noise_files = par_track(_get_num_samples, noise_files, progress=progress)
     progress.close()
     return dataclass_from_dict(NoiseFiles, noise_files)
-def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
+def append_noise_files(entry: dict | str, tokens: dict | None = None) -> list[dict]:
     """Process noise files list and append as needed
     :param entry: Noise file entry to append to the list
@@ -423,7 +455,6 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
     from os.path import join
     from os.path import splitext
-    from sonusai import SonusAIError
     from .audio import validate_input_file
     from .tokenized_shell_vars import tokenized_expand
     from .tokenized_shell_vars import tokenized_replace
@@ -432,10 +463,10 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
         tokens = {}
     if isinstance(entry, dict):
-        if 'name' in entry:
-            in_name = entry['name']
+        if "name" in entry:
+            in_name = entry["name"]
         else:
-            raise SonusAIError('Noise list contained record without name')
+            raise AttributeError("Noise list contained record without name")
     else:
         in_name = entry
@@ -443,7 +474,7 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
     tokens.update(new_tokens)
     names = sorted(glob(in_name))
     if not names:
-        raise SonusAIError(f'Could not find {in_name}. Make sure path exists')
+        raise OSError(f"Could not find {in_name}. Make sure path exists")
     noise_files: list[dict] = []
     for name in names:
@@ -457,11 +488,11 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
                 noise_files.extend(append_noise_files(entry=child, tokens=tokens))
         else:
             try:
-                if ext == '.txt':
-                    with open(file=name, mode='r') as txt_file:
+                if ext == ".txt":
+                    with open(file=name) as txt_file:
                         for line in txt_file:
                             # strip comments
-                            child = line.partition('#')[0]
+                            child = line.partition("#")[0]
                             child = child.rstrip()
                             if child:
                                 child, new_tokens = tokenized_expand(child)
@@ -469,26 +500,24 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
                                 if not isabs(child):
                                     child = join(dir_name, child)
                                 noise_files.extend(append_noise_files(entry=child, tokens=tokens))
-                elif ext == '.yml':
+                elif ext == ".yml":
                     try:
                         yml_config = raw_load_config(name)
-                        if 'noises' in yml_config:
-                            for record in yml_config['noises']:
+                        if "noises" in yml_config:
+                            for record in yml_config["noises"]:
                                 noise_files.extend(append_noise_files(entry=record, tokens=tokens))
                     except Exception as e:
-                        raise SonusAIError(f'Error processing {name}: {e}')
+                        raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
                     noise_file: dict = {
-                        'expanded_name': name,
-                        'name':          tokenized_replace(name, tokens),
+                        "expanded_name": name,
+                        "name": tokenized_replace(name, tokens),
                     }
                     noise_files.append(noise_file)
-            except SonusAIError:
-                raise
             except Exception as e:
-                raise SonusAIError(f'Error processing {name}: {e}')
+                raise OSError(f"Error processing {name}: {e}") from e
     return noise_files
@@ -499,13 +528,20 @@ def get_impulse_response_files(config: dict) -> ImpulseResponseFiles:
     :param config: Config dictionary
     :return: List of impulse response files
     """
-    from itertools import chain
-    return list(
-        chain.from_iterable([append_impulse_response_files(entry=entry) for entry in config['impulse_responses']]))
-def append_impulse_response_files(entry: str, tokens: dict = None) -> list[str]:
+    return [ImpulseResponseFile(entry["name"], entry["tags"]) for entry in config["impulse_responses"]]
+    # from itertools import chain
+    #
+    # return list(
+    #     chain.from_iterable(
+    #         [
+    #             append_impulse_response_files(entry=ImpulseResponseFile(entry["name"], entry["tags"]))
+    #             for entry in config["impulse_responses"]
+    #         ]
+    #     )
+    # )
+def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | None = None) -> list[str]:
     """Process impulse response files list and append as needed
     :param entry: Impulse response file entry to append to the list
@@ -520,7 +556,6 @@ def append_impulse_response_files(entry: str, tokens: dict = None) -> list[str]:
     from os.path import join
     from os.path import splitext
-    from sonusai import SonusAIError
     from .audio import validate_input_file
     from .tokenized_shell_vars import tokenized_expand
     from .tokenized_shell_vars import tokenized_replace
@@ -528,11 +563,11 @@ def append_impulse_response_files(entry: str, tokens: dict = None) -> list[str]:
     if tokens is None:
         tokens = {}
-    in_name, new_tokens = tokenized_expand(entry)
+    in_name, new_tokens = tokenized_expand(entry.file)
     tokens.update(new_tokens)
     names = sorted(glob(in_name))
     if not names:
-        raise SonusAIError(f'Could not find {in_name}. Make sure path exists')
+        raise OSError(f"Could not find {in_name}. Make sure path exists")
     impulse_response_files: list[str] = []
     for name in names:
@@ -540,41 +575,41 @@ def append_impulse_response_files(entry: str, tokens: dict = None) -> list[str]:
         dir_name = dirname(name)
         if isdir(name):
             for file in listdir(name):
-                child = file
-                if not isabs(child):
-                    child = join(dir_name, child)
+                if not isabs(file):
+                    file = join(dir_name, file)
+                child = ImpulseResponseFile(file, entry.tags)
                 impulse_response_files.extend(append_impulse_response_files(entry=child, tokens=tokens))
         else:
             try:
-                if ext == '.txt':
-                    with open(file=name, mode='r') as txt_file:
+                if ext == ".txt":
+                    with open(file=name) as txt_file:
                         for line in txt_file:
                             # strip comments
-                            child = line.partition('#')[0]
-                            child = child.rstrip()
-                            if child:
-                                child, new_tokens = tokenized_expand(child)
+                            file = line.partition("#")[0]
+                            file = file.rstrip()
+                            if file:
+                                file, new_tokens = tokenized_expand(file)
                                 tokens.update(new_tokens)
-                                if not isabs(child):
-                                    child = join(dir_name, child)
+                                if not isabs(file):
+                                    file = join(dir_name, file)
+                                child = ImpulseResponseFile(file, entry.tags)
                                 impulse_response_files.extend(append_impulse_response_files(entry=child, tokens=tokens))
-                elif ext == '.yml':
+                elif ext == ".yml":
                     try:
                         yml_config = raw_load_config(name)
-                        if 'impulse_responses' in yml_config:
-                            for record in yml_config['impulse_responses']:
+                        if "impulse_responses" in yml_config:
+                            for record in yml_config["impulse_responses"]:
                                 impulse_response_files.extend(
-                                    append_impulse_response_files(entry=record, tokens=tokens))
+                                    append_impulse_response_files(entry=record, tokens=tokens)
+                                )
                     except Exception as e:
-                        raise SonusAIError(f'Error processing {name}: {e}')
+                        raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
                     impulse_response_files.append(tokenized_replace(name, tokens))
-            except SonusAIError:
-                raise
             except Exception as e:
-                raise SonusAIError(f'Error processing {name}: {e}')
+                raise OSError(f"Error processing {name}: {e}") from e
     return impulse_response_files
@@ -585,19 +620,51 @@ def get_spectral_masks(config: dict) -> SpectralMasks:
     :param config: Config dictionary
     :return: List of spectral masks
     """
-    from sonusai import SonusAIError
     from sonusai.utils import dataclass_from_dict
-    from .datatypes import SpectralMasks
     try:
-        return dataclass_from_dict(SpectralMasks, config['spectral_masks'])
+        return dataclass_from_dict(SpectralMasks, config["spectral_masks"])
     except Exception as e:
-        raise SonusAIError(f'Error in spectral_masks: {e}')
+        raise ValueError(f"Error in spectral_masks: {e}") from e
+def get_truth_parameters(config: dict) -> TruthParameters:
+    """Get the list of truth parameters from a config
+    :param config: Config dictionary
+    :return: List of truth parameters
+    """
+    from copy import deepcopy
+    from sonusai.mixture import truth_functions
+    from sonusai.mixture.truth_functions.datatypes import TruthFunctionConfig
+    from .constants import REQUIRED_TRUTH_CONFIGS
+    from .datatypes import TruthParameter
+    truth_parameters: TruthParameters = []
+    for name, truth_config in config["truth_configs"].items():
+        optional_config = deepcopy(truth_config)
+        for key in REQUIRED_TRUTH_CONFIGS:
+            del optional_config[key]
+        t_config = TruthFunctionConfig(
+            feature=config["feature"],
+            num_classes=config["num_classes"],
+            class_indices=[1],
+            target_gain=1,
+            config=optional_config,
+        )
+        parameters = getattr(truth_functions, truth_config["function"] + "_parameters")(t_config)
+        truth_parameters.append(TruthParameter(name, parameters))
+    return truth_parameters
 def _get_num_samples(entry: dict) -> dict:
     from .audio import get_num_samples
-    entry['samples'] = get_num_samples(entry['expanded_name'])
-    del entry['expanded_name']
+    entry["samples"] = get_num_samples(entry["expanded_name"])
+    del entry["expanded_name"]
     return entry

sonusai 0.18.9__py3-none-any.whl → 0.19.5__py3-none-any.whl

sonusai 0.18.9py3-none-any.whl → 0.19.5py3-none-any.whl