PyPI - sonusai - Versions diffs - 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl - Mend

sonusai 0.19.6py3-none-any.whl → 0.19.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

sonusai/__init__.py +1 -1
sonusai/aawscd_probwrite.py +1 -1
sonusai/calc_metric_spenh.py +1 -1
sonusai/genft.py +29 -14
sonusai/genmetrics.py +60 -42
sonusai/genmix.py +41 -29
sonusai/genmixdb.py +56 -64
sonusai/metrics/calc_class_weights.py +1 -3
sonusai/metrics/calc_optimal_thresholds.py +2 -2
sonusai/metrics/calc_phase_distance.py +1 -1
sonusai/metrics/calc_speech.py +6 -6
sonusai/metrics/class_summary.py +6 -15
sonusai/metrics/confusion_matrix_summary.py +11 -27
sonusai/metrics/one_hot.py +3 -3
sonusai/metrics/snr_summary.py +7 -7
sonusai/mixture/__init__.py +2 -17
sonusai/mixture/augmentation.py +5 -6
sonusai/mixture/class_count.py +1 -1
sonusai/mixture/config.py +36 -46
sonusai/mixture/data_io.py +30 -1
sonusai/mixture/datatypes.py +29 -40
sonusai/mixture/db_datatypes.py +1 -1
sonusai/mixture/feature.py +3 -23
sonusai/mixture/generation.py +161 -204
sonusai/mixture/helpers.py +29 -187
sonusai/mixture/mixdb.py +386 -159
sonusai/mixture/soundfile_audio.py +1 -1
sonusai/mixture/sox_audio.py +4 -4
sonusai/mixture/sox_augmentation.py +1 -1
sonusai/mixture/target_class_balancing.py +9 -11
sonusai/mixture/targets.py +23 -20
sonusai/mixture/torchaudio_audio.py +18 -7
sonusai/mixture/torchaudio_augmentation.py +3 -4
sonusai/mixture/truth.py +21 -34
sonusai/mixture/truth_functions/__init__.py +6 -0
sonusai/mixture/truth_functions/crm.py +51 -37
sonusai/mixture/truth_functions/energy.py +95 -50
sonusai/mixture/truth_functions/file.py +12 -8
sonusai/mixture/truth_functions/metadata.py +24 -0
sonusai/mixture/truth_functions/metrics.py +28 -0
sonusai/mixture/truth_functions/phoneme.py +4 -5
sonusai/mixture/truth_functions/sed.py +32 -23
sonusai/mixture/truth_functions/target.py +62 -29
sonusai/mkwav.py +20 -19
sonusai/queries/queries.py +9 -15
sonusai/speech/l2arctic.py +6 -2
sonusai/summarize_metric_spenh.py +1 -1
sonusai/utils/__init__.py +1 -0
sonusai/utils/asr_functions/aaware_whisper.py +1 -1
sonusai/utils/audio_devices.py +27 -18
sonusai/utils/docstring.py +6 -3
sonusai/utils/energy_f.py +5 -3
sonusai/utils/human_readable_size.py +6 -6
sonusai/utils/load_object.py +15 -0
sonusai/utils/onnx_utils.py +2 -2
sonusai/utils/print_mixture_details.py +3 -3
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/METADATA +2 -2
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/RECORD +60 -58
sonusai/mixture/truth_functions/datatypes.py +0 -37
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/WHEEL +0 -0
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/entry_points.txt +0 -0

sonusai/mixture/config.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from sonusai.mixture.datatypes import ImpulseResponseFile
-from sonusai.mixture.datatypes import ImpulseResponseFiles
-from sonusai.mixture.datatypes import NoiseFiles
-from sonusai.mixture.datatypes import SpectralMasks
-from sonusai.mixture.datatypes import TargetFiles
-from sonusai.mixture.datatypes import TruthParameters
+from sonusai.mixture.datatypes import NoiseFile
+from sonusai.mixture.datatypes import SpectralMask
+from sonusai.mixture.datatypes import TargetFile
+from sonusai.mixture.datatypes import TruthParameter
 def raw_load_config(name: str) -> dict:
@@ -210,7 +209,7 @@ def update_config_from_hierarchy(root: str, leaf: str, config: dict) -> dict:
     return new_config
-def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
+def get_target_files(config: dict, show_progress: bool = False) -> list[TargetFile]:
     """Get the list of target files from a config
     :param config: Config dictionary
@@ -223,7 +222,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
     from sonusai.utils import par_track
     from sonusai.utils import track
-    from .datatypes import TargetFiles
+    from .datatypes import TargetFile
     class_indices = config["class_indices"]
     if not isinstance(class_indices, list):
@@ -255,7 +254,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
         if any(class_index > num_classes for class_index in target_file["class_indices"]):
             raise ValueError(f"class index elements must not be greater than {num_classes}")
-    return dataclass_from_dict(TargetFiles, target_files)
+    return dataclass_from_dict(list[TargetFile], target_files)
 def append_target_files(
@@ -294,6 +293,7 @@ def append_target_files(
     if tokens is None:
         tokens = {}
+    truth_configs_merged = deepcopy(truth_configs)
     if isinstance(entry, dict):
         if "name" in entry:
             in_name = entry["name"]
@@ -312,15 +312,11 @@ def append_target_files(
                 raise AttributeError(
                     f"Truth config '{key}' override specified for {entry['name']} is not defined at top level"
                 )
-        truth_configs_merged = {}
-        for key in truth_configs_override:
-            truth_configs_merged[key] = deepcopy(truth_configs[key])
-            if truth_configs_override[key] is not None:
+            if key in truth_configs_override:
                 truth_configs_merged[key] |= truth_configs_override[key]
         level_type = entry.get("level_type", level_type)
     else:
         in_name = entry
-        truth_configs_merged = deepcopy(truth_configs)
     in_name, new_tokens = tokenized_expand(in_name)
     tokens.update(new_tokens)
@@ -416,7 +412,7 @@ def append_target_files(
     return target_files
-def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
+def get_noise_files(config: dict, show_progress: bool = False) -> list[NoiseFile]:
     """Get the list of noise files from a config
     :param config: Config dictionary
@@ -429,7 +425,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
     from sonusai.utils import par_track
     from sonusai.utils import track
-    from .datatypes import NoiseFiles
+    from .datatypes import NoiseFile
     noise_files = list(chain.from_iterable([append_noise_files(entry=entry) for entry in config["noises"]]))
@@ -437,7 +433,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
     noise_files = par_track(_get_num_samples, noise_files, progress=progress)
     progress.close()
-    return dataclass_from_dict(NoiseFiles, noise_files)
+    return dataclass_from_dict(list[NoiseFile], noise_files)
 def append_noise_files(entry: dict | str, tokens: dict | None = None) -> list[dict]:
@@ -522,26 +518,25 @@ def append_noise_files(entry: dict | str, tokens: dict | None = None) -> list[di
     return noise_files
-def get_impulse_response_files(config: dict) -> ImpulseResponseFiles:
+def get_impulse_response_files(config: dict) -> list[ImpulseResponseFile]:
     """Get the list of impulse response files from a config
     :param config: Config dictionary
     :return: List of impulse response files
     """
-    return [ImpulseResponseFile(entry["name"], entry["tags"]) for entry in config["impulse_responses"]]
-    # from itertools import chain
-    #
-    # return list(
-    #     chain.from_iterable(
-    #         [
-    #             append_impulse_response_files(entry=ImpulseResponseFile(entry["name"], entry["tags"]))
-    #             for entry in config["impulse_responses"]
-    #         ]
-    #     )
-    # )
-def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | None = None) -> list[str]:
+    from itertools import chain
+    return list(
+        chain.from_iterable(
+            [
+                append_impulse_response_files(entry=ImpulseResponseFile(entry["name"], entry.get("tags", [])))
+                for entry in config["impulse_responses"]
+            ]
+        )
+    )
+def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | None = None) -> list[ImpulseResponseFile]:
     """Process impulse response files list and append as needed
     :param entry: Impulse response file entry to append to the list
@@ -569,7 +564,7 @@ def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | Non
     if not names:
         raise OSError(f"Could not find {in_name}. Make sure path exists")
-    impulse_response_files: list[str] = []
+    impulse_response_files: list[ImpulseResponseFile] = []
     for name in names:
         ext = splitext(name)[1].lower()
         dir_name = dirname(name)
@@ -607,14 +602,14 @@ def append_impulse_response_files(entry: ImpulseResponseFile, tokens: dict | Non
                         raise OSError(f"Error processing {name}: {e}") from e
                 else:
                     validate_input_file(name)
-                    impulse_response_files.append(tokenized_replace(name, tokens))
+                    impulse_response_files.append(ImpulseResponseFile(tokenized_replace(name, tokens), entry.tags))
             except Exception as e:
                 raise OSError(f"Error processing {name}: {e}") from e
     return impulse_response_files
-def get_spectral_masks(config: dict) -> SpectralMasks:
+def get_spectral_masks(config: dict) -> list[SpectralMask]:
     """Get the list of spectral masks from a config
     :param config: Config dictionary
@@ -623,12 +618,12 @@ def get_spectral_masks(config: dict) -> SpectralMasks:
     from sonusai.utils import dataclass_from_dict
     try:
-        return dataclass_from_dict(SpectralMasks, config["spectral_masks"])
+        return dataclass_from_dict(list[SpectralMask], config["spectral_masks"])
     except Exception as e:
         raise ValueError(f"Error in spectral_masks: {e}") from e
-def get_truth_parameters(config: dict) -> TruthParameters:
+def get_truth_parameters(config: dict) -> list[TruthParameter]:
     """Get the list of truth parameters from a config
     :param config: Config dictionary
@@ -637,26 +632,21 @@ def get_truth_parameters(config: dict) -> TruthParameters:
     from copy import deepcopy
     from sonusai.mixture import truth_functions
-    from sonusai.mixture.truth_functions.datatypes import TruthFunctionConfig
     from .constants import REQUIRED_TRUTH_CONFIGS
     from .datatypes import TruthParameter
-    truth_parameters: TruthParameters = []
+    truth_parameters: list[TruthParameter] = []
     for name, truth_config in config["truth_configs"].items():
         optional_config = deepcopy(truth_config)
         for key in REQUIRED_TRUTH_CONFIGS:
             del optional_config[key]
-        t_config = TruthFunctionConfig(
-            feature=config["feature"],
-            num_classes=config["num_classes"],
-            class_indices=[1],
-            target_gain=1,
-            config=optional_config,
+        parameters = getattr(truth_functions, truth_config["function"] + "_parameters")(
+            config["feature"],
+            config["num_classes"],
+            optional_config,
         )
-        parameters = getattr(truth_functions, truth_config["function"] + "_parameters")(t_config)
         truth_parameters.append(TruthParameter(name, parameters))
     return truth_parameters

sonusai/mixture/data_io.py CHANGED Viewed

@@ -128,6 +128,22 @@ def write_pickle_data(location: str, index: str, items: list[tuple[str, Any]] |
             f.write(pickle.dumps(item[1]))
+def clear_pickle_data(location: str, index: str, items: list[str] | str) -> None:
+    """Clear mixture, target, or noise data pickle file
+    :param location: Location of the file
+    :param index: Mixture, target, or noise index
+    :param items: String(s) of data to retrieve
+    """
+    from pathlib import Path
+    if not isinstance(items, list):
+        items = [items]
+    for item in items:
+        Path(_get_pickle_name(location, index, item)).unlink(missing_ok=True)
 def read_cached_data(location: str, name: str, index: str, items: list[str] | str) -> Any:
     """Read cached data from a file
@@ -143,7 +159,7 @@ def read_cached_data(location: str, name: str, index: str, items: list[str] | st
 def write_cached_data(location: str, name: str, index: str, items: list[tuple[str, Any]] | tuple[str, Any]) -> None:
-    """Write mixture data to a file
+    """Write data to a file
     :param location: Location of the mixture database
     :param name: Data name ('mixture', 'target', or 'noise')
@@ -153,3 +169,16 @@ def write_cached_data(location: str, name: str, index: str, items: list[tuple[st
     from os.path import join
     write_pickle_data(join(location, name), index, items)
+def clear_cached_data(location: str, name: str, index: str, items: list[str] | str) -> None:
+    """Remove cached data file(s)
+    :param location: Location of the mixture database
+    :param name: Data name ('mixture', 'target', or 'noise')
+    :param index: Data index (mixture, target, or noise ID)
+    :param items: String(s) of data to clear
+    """
+    from os.path import join
+    clear_pickle_data(join(location, name), index, items)

sonusai/mixture/datatypes.py CHANGED Viewed

@@ -12,16 +12,12 @@ from dataclasses_json import DataClassJsonMixin
 from praatio.utilities.constants import Interval
 AudioT: TypeAlias = npt.NDArray[np.float32]
-AudiosT: TypeAlias = list[AudioT]
-ListAudiosT: TypeAlias = list[AudiosT]
-Truth: TypeAlias = npt.NDArray[np.float32]
+Truth: TypeAlias = Any
 TruthDict: TypeAlias = dict[str, Truth]
 Segsnr: TypeAlias = npt.NDArray[np.float32]
 AudioF: TypeAlias = npt.NDArray[np.complex64]
-AudiosF: TypeAlias = list[AudioF]
 EnergyT: TypeAlias = npt.NDArray[np.float32]
 EnergyF: TypeAlias = npt.NDArray[np.float32]
@@ -92,9 +88,6 @@ class AugmentationRule(DataClassSonusAIMixin):
     mixup: int = 1
-AugmentationRules: TypeAlias = list[AugmentationRule]
 @dataclass
 class Augmentation(DataClassSonusAIMixin):
     normalize: float | None = None
@@ -108,9 +101,6 @@ class Augmentation(DataClassSonusAIMixin):
     ir: int | None = None
-Augmentations: TypeAlias = list[Augmentation]
 @dataclass(frozen=True)
 class UniversalSNRGenerator:
     is_random: bool
@@ -159,18 +149,12 @@ class TargetFile(DataClassSonusAIMixin):
         return self.samples / SAMPLE_RATE
-TargetFiles: TypeAlias = list[TargetFile]
 @dataclass
 class AugmentedTarget(DataClassSonusAIMixin):
     target_id: int
     target_augmentation_id: int
-AugmentedTargets: TypeAlias = list[AugmentedTarget]
 @dataclass
 class NoiseFile(DataClassSonusAIMixin):
     name: str
@@ -183,7 +167,6 @@ class NoiseFile(DataClassSonusAIMixin):
         return self.samples / SAMPLE_RATE
-NoiseFiles: TypeAlias = list[NoiseFile]
 ClassCount: TypeAlias = list[int]
 GeneralizedIDs: TypeAlias = str | int | list[int] | range
@@ -191,11 +174,11 @@ GeneralizedIDs: TypeAlias = str | int | list[int] | range
 @dataclass
 class GenMixData:
-    targets: AudiosT | None = None
+    targets: list[AudioT] | None = None
     target: AudioT | None = None
     noise: AudioT | None = None
     mixture: AudioT | None = None
-    truth_t: TruthDict | None = None
+    truth_t: list[TruthDict] | None = None
     segsnr_t: Segsnr | None = None
@@ -223,9 +206,6 @@ class ImpulseResponseFile:
     tags: list[str]
-ImpulseResponseFiles: TypeAlias = list[ImpulseResponseFile]
 @dataclass(frozen=True)
 class SpectralMask(DataClassSonusAIMixin):
     f_max_width: int
@@ -235,23 +215,24 @@ class SpectralMask(DataClassSonusAIMixin):
     t_max_percent: int
-SpectralMasks: TypeAlias = list[SpectralMask]
 @dataclass(frozen=True)
 class TruthParameter(DataClassSonusAIMixin):
     name: str
-    parameters: int
-TruthParameters: TypeAlias = list[TruthParameter]
+    parameters: int | None
 @dataclass
 class Target(DataClassSonusAIMixin):
     file_id: int
     augmentation: Augmentation
-    gain: float = 1.0
+    @property
+    def gain(self) -> float:
+        # gain is used to back out the gain augmentation in order to return the target audio
+        # to its normalized level when calculating truth (if needed).
+        if self.augmentation.gain is None:
+            return 1.0
+        return round(10 ** (self.augmentation.gain / 20), ndigits=5)
 Targets: TypeAlias = list[Target]
@@ -261,14 +242,14 @@ Targets: TypeAlias = list[Target]
 class Noise(DataClassSonusAIMixin):
     file_id: int
     augmentation: Augmentation
-    offset: int = 0
 @dataclass
 class Mixture(DataClassSonusAIMixin):
     name: str
-    targets: Targets
+    targets: list[Target]
     noise: Noise
+    noise_offset: int
     samples: int
     snr: UniversalSNR
     spectral_mask_id: int
@@ -288,8 +269,16 @@ class Mixture(DataClassSonusAIMixin):
     def target_augmentations(self) -> list[Augmentation]:
         return [target.augmentation for target in self.targets]
+    @property
+    def is_noise_only(self) -> bool:
+        return self.snr < -96
+    @property
+    def is_target_only(self) -> bool:
+        return self.snr > 96
-Mixtures: TypeAlias = list[Mixture]
+    def target_gain(self, target_index: int) -> float:
+        return (self.targets[target_index].gain if not self.is_noise_only else 0) * self.target_snr_gain
 @dataclass(frozen=True)
@@ -304,7 +293,7 @@ class TransformConfig:
 @dataclass(frozen=True)
 class FeatureGeneratorConfig:
     feature_mode: str
-    truth_parameters: dict[str, int]
+    truth_parameters: dict[str, int | None]
 @dataclass(frozen=True)
@@ -328,13 +317,13 @@ class MixtureDatabaseConfig(DataClassSonusAIMixin):
     class_labels: list[str]
     class_weights_threshold: list[float]
     feature: str
-    impulse_response_files: ImpulseResponseFiles
-    mixtures: Mixtures
+    impulse_response_files: list[ImpulseResponseFile]
+    mixtures: list[Mixture]
     noise_mix_mode: str
-    noise_files: NoiseFiles
+    noise_files: list[NoiseFile]
     num_classes: int
-    spectral_masks: SpectralMasks
-    target_files: TargetFiles
+    spectral_masks: list[SpectralMask]
+    target_files: list[TargetFile]
 SpeechMetadata: TypeAlias = str | list[Interval] | None

sonusai/mixture/db_datatypes.py CHANGED Viewed

@@ -35,7 +35,7 @@ SpectralMaskRecord = namedtuple(
     ["id", "f_max_width", "f_num", "t_max_width", "t_num", "t_max_percent"],
 )
-TargetRecord = namedtuple("TargetRecord", ["id", "file_id", "augmentation", "gain"])
+TargetRecord = namedtuple("TargetRecord", ["id", "file_id", "augmentation"])
 MixtureRecord = namedtuple(
     "MixtureRecord",

sonusai/mixture/feature.py CHANGED Viewed

@@ -12,7 +12,6 @@ def get_feature_from_audio(
     :param feature_mode: Feature mode
     :return: Feature data [frames, strides, feature_parameters]
     """
-    import numpy as np
     from pyaaware import FeatureGenerator
     from .datatypes import TransformConfig
@@ -31,33 +30,14 @@ def get_feature_from_audio(
         ),
     )
-    transform_frames = audio_f.shape[0]
-    feature_frames = transform_frames // (fg.decimation * fg.step)
-    feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
-    feature_frame = 0
-    for transform_frame in range(transform_frames):
-        fg.execute(audio_f[transform_frame])
-        if fg.eof():
-            feature[feature_frame] = fg.feature()
-            feature_frame += 1
+    return fg.execute_all(audio_f)[0]
-    return feature
-def get_audio_from_feature(
-    feature: Feature,
-    feature_mode: str,
-    num_classes: int | None = 1,
-    truth_mutex: bool | None = False,
-) -> AudioT:
+def get_audio_from_feature(feature: Feature, feature_mode: str) -> AudioT:
     """Apply inverse transform to feature data to generate audio data
     :param feature: Feature data [frames, stride=1, feature_parameters]
     :param feature_mode: Feature mode
-    :param num_classes: Number of classes
-    :param truth_mutex: Whether to calculate 'other' label
     :return: Audio data [samples]
     """
     import numpy as np
@@ -75,7 +55,7 @@ def get_audio_from_feature(
     if feature.shape[1] != 1:
         raise ValueError("Strided feature data is not supported for audio extraction; stride must be 1.")
-    fg = FeatureGenerator(feature_mode=feature_mode, num_classes=num_classes, truth_mutex=truth_mutex)
+    fg = FeatureGenerator(feature_mode=feature_mode)
     feature_complex = unstack_complex(feature.squeeze())
     if feature_mode[0:1] == "h":

sonusai 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl

sonusai 0.19.6py3-none-any.whl → 0.19.9py3-none-any.whl