PyPI - sonusai - Versions diffs - 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

sonusai/__init__.py +16 -3
sonusai/audiofe.py +241 -77
sonusai/calc_metric_spenh.py +71 -73
sonusai/config/__init__.py +3 -0
sonusai/config/config.py +61 -0
sonusai/config/config.yml +20 -0
sonusai/config/constants.py +8 -0
sonusai/constants.py +11 -0
sonusai/data/genmixdb.yml +21 -36
sonusai/{mixture/datatypes.py → datatypes.py} +91 -130
sonusai/deprecated/plot.py +4 -5
sonusai/doc/doc.py +4 -4
sonusai/doc.py +11 -4
sonusai/genft.py +43 -45
sonusai/genmetrics.py +25 -19
sonusai/genmix.py +54 -82
sonusai/genmixdb.py +88 -264
sonusai/ir_metric.py +30 -34
sonusai/lsdb.py +41 -48
sonusai/main.py +15 -22
sonusai/metrics/calc_audio_stats.py +4 -293
sonusai/metrics/calc_class_weights.py +4 -4
sonusai/metrics/calc_optimal_thresholds.py +8 -5
sonusai/metrics/calc_pesq.py +2 -2
sonusai/metrics/calc_segsnr_f.py +4 -4
sonusai/metrics/calc_speech.py +25 -13
sonusai/metrics/class_summary.py +7 -7
sonusai/metrics/confusion_matrix_summary.py +5 -5
sonusai/metrics/one_hot.py +4 -4
sonusai/metrics/snr_summary.py +7 -7
sonusai/metrics_summary.py +38 -45
sonusai/mixture/__init__.py +4 -104
sonusai/mixture/audio.py +10 -39
sonusai/mixture/class_balancing.py +103 -0
sonusai/mixture/config.py +251 -271
sonusai/mixture/constants.py +35 -39
sonusai/mixture/data_io.py +25 -36
sonusai/mixture/db_datatypes.py +58 -22
sonusai/mixture/effects.py +386 -0
sonusai/mixture/feature.py +7 -11
sonusai/mixture/generation.py +478 -628
sonusai/mixture/helpers.py +82 -184
sonusai/mixture/ir_delay.py +3 -4
sonusai/mixture/ir_effects.py +77 -0
sonusai/mixture/log_duration_and_sizes.py +6 -12
sonusai/mixture/mixdb.py +910 -729
sonusai/mixture/pad_audio.py +35 -0
sonusai/mixture/resample.py +7 -0
sonusai/mixture/sox_effects.py +195 -0
sonusai/mixture/sox_help.py +650 -0
sonusai/mixture/spectral_mask.py +2 -2
sonusai/mixture/truth.py +17 -15
sonusai/mixture/truth_functions/crm.py +12 -12
sonusai/mixture/truth_functions/energy.py +22 -22
sonusai/mixture/truth_functions/file.py +5 -5
sonusai/mixture/truth_functions/metadata.py +4 -4
sonusai/mixture/truth_functions/metrics.py +4 -4
sonusai/mixture/truth_functions/phoneme.py +3 -3
sonusai/mixture/truth_functions/sed.py +11 -13
sonusai/mixture/truth_functions/target.py +10 -10
sonusai/mkwav.py +26 -29
sonusai/onnx_predict.py +240 -88
sonusai/queries/__init__.py +2 -2
sonusai/queries/queries.py +38 -34
sonusai/speech/librispeech.py +1 -1
sonusai/speech/mcgill.py +1 -1
sonusai/speech/timit.py +2 -2
sonusai/summarize_metric_spenh.py +10 -17
sonusai/utils/__init__.py +7 -1
sonusai/utils/asl_p56.py +2 -2
sonusai/utils/asr.py +2 -2
sonusai/utils/asr_functions/aaware_whisper.py +4 -5
sonusai/utils/choice.py +31 -0
sonusai/utils/compress.py +1 -1
sonusai/utils/dataclass_from_dict.py +19 -1
sonusai/utils/energy_f.py +3 -3
sonusai/utils/evaluate_random_rule.py +15 -0
sonusai/utils/keyboard_interrupt.py +12 -0
sonusai/utils/onnx_utils.py +3 -17
sonusai/utils/print_mixture_details.py +21 -19
sonusai/utils/{temp_seed.py → rand.py} +3 -3
sonusai/utils/read_predict_data.py +2 -2
sonusai/utils/reshape.py +3 -3
sonusai/utils/stratified_shuffle_split.py +3 -3
sonusai/{mixture → utils}/tokenized_shell_vars.py +1 -1
sonusai/utils/write_audio.py +2 -2
sonusai/vars.py +11 -4
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/METADATA +4 -2
sonusai-1.0.2.dist-info/RECORD +138 -0
sonusai/mixture/augmentation.py +0 -444
sonusai/mixture/class_count.py +0 -15
sonusai/mixture/eq_rule_is_valid.py +0 -45
sonusai/mixture/target_class_balancing.py +0 -107
sonusai/mixture/targets.py +0 -175
sonusai-0.20.3.dist-info/RECORD +0 -128
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/WHEEL +0 -0
{sonusai-0.20.3.dist-info → sonusai-1.0.2.dist-info}/entry_points.txt +0 -0

sonusai/mixture/effects.py ADDED Viewed

@@ -0,0 +1,386 @@
+# sai_rand
+# sai_rand_choice
+# sai_rand_choice_nr
+# sai_sequence
+# sai_expand
+from ..datatypes import AudioT
+from ..datatypes import Effects
+from .mixdb import MixtureDatabase
+def get_effect_rules(location: str, config: dict, test: bool = False) -> dict[str, list[Effects]]:
+    from ..datatypes import Effects
+    from ..utils.dataclass_from_dict import list_dataclass_from_dict
+    from .mixdb import MixtureDatabase
+    mixdb = MixtureDatabase(location, test)
+    rules: dict[str, list[Effects]] = {}
+    for category, source in config["sources"].items():
+        processed_rules: list[dict] = []
+        for rule in source["effects"]:
+            rule = _parse_ir_rule(rule, mixdb.num_ir_files)
+            processed_rules = _expand_effect_rules(processed_rules, rule)
+        rules[category] = list_dataclass_from_dict(list[Effects], processed_rules)
+    validate_rules(mixdb, rules)
+    return rules
+def sai_expand(text: str) -> list[str]:
+    import re
+    # search pattern
+    pattern = re.compile(r"sai_expand\((.+?)\)")
+    # initialize with input
+    expanded = [text]
+    # look for pattern
+    result = re.search(pattern, text)
+    # if found
+    if result:
+        # remove entry we are expanding
+        expanded.pop()
+        # convert match into list stripped of whitespace
+        values = result.group(1).replace(" ", "").split(",")
+        # loop over values
+        for value in values:
+            # replace pattern with value
+            replacement = re.sub(pattern, value, text, count=1)
+            # extend result with expand of replacement (for handling multiple expands in a single rule)
+            expanded.extend(sai_expand(replacement))
+    return expanded
+def _expand_effect_rules(expanded_rules: list[dict], rule: dict) -> list[dict]:
+    from copy import deepcopy
+    for key in ("pre", "post"):
+        if key in rule:
+            value = rule[key]
+            for idx in range(len(value)):
+                new_rules = sai_expand(value[idx])
+                if len(new_rules) > 1:
+                    for new_rule in new_rules:
+                        expanded_effect = deepcopy(rule)
+                        new_value = deepcopy(value)
+                        new_value[idx] = new_rule
+                        expanded_effect[key] = new_value
+                        _expand_effect_rules(expanded_rules, expanded_effect)
+                    return expanded_rules
+    expanded_rules.append(rule)
+    return expanded_rules
+def _parse_ir_rule(rule: dict, num_ir: int) -> dict:
+    from ..datatypes import EffectList
+    from .helpers import generic_ids_to_list
+    def _resolve_str(parameters: str) -> str:
+        if parameters.startswith("sai_"):
+            return f"ir {parameters}"
+        irs = generic_ids_to_list(num_ir, parameters)
+        if not all(ro in range(num_ir) for ro in irs):
+            raise ValueError(f"Invalid ir of {parameters}")
+        if len(irs) == 1:
+            return f"ir {irs[0]}"
+        return f"ir sai_expand({', '.join(map(str, irs))})"
+    def _process(rules_in: EffectList) -> EffectList:
+        rules_out: EffectList = []
+        for rule_in in rules_in:
+            parts = rule_in.split(maxsplit=1)
+            name = parts[0]
+            if name != "ir":
+                rules_out.append(rule_in)
+                continue
+            if len(parts) == 1:
+                continue
+            parameters = parts[1]
+            if parameters.isnumeric():
+                ir = int(parameters)
+                if ir not in range(num_ir):
+                    raise ValueError(f"Invalid ir of {parameters}")
+                rules_out.append(rule_in)
+                continue
+            if isinstance(parameters, str):
+                rules_out.append(_resolve_str(parameters))
+                continue
+            raise ValueError(f"Invalid ir of {parameters}")
+        return rules_out
+    for key in ("pre", "post"):
+        if key in rule:
+            rule[key] = _process(rule[key])
+    return rule
+def apply_effects(
+    mixdb: MixtureDatabase,
+    audio: AudioT,
+    effects: Effects,
+    pre: bool = True,
+    post: bool = True,
+) -> AudioT:
+    """Apply effects to audio data
+    :param mixdb: Mixture database
+    :param audio: Input audio
+    :param effects: Effects
+    :param pre: Apply pre-truth effects
+    :param post: Apply post-truth effects
+    :return: Output audio
+    """
+    from ..datatypes import EffectList
+    from .ir_effects import apply_ir
+    from .ir_effects import read_ir
+    from .sox_effects import apply_sox_effects
+    def _process(audio_in: AudioT, effects_in) -> AudioT:
+        _effects: EffectList = []
+        for effect in effects_in:
+            if effect.startswith("ir "):
+                # Apply effects gathered so far
+                audio_in = apply_sox_effects(audio_in, _effects)
+                # Then empty the effects list
+                _effects = []
+                # Apply IR
+                index = int(effect.split()[1])
+                audio_in = apply_ir(
+                    audio=audio_in,
+                    ir=read_ir(
+                        name=mixdb.ir_file(index),
+                        delay=mixdb.ir_delay(index),
+                        use_cache=mixdb.use_cache,
+                    ),
+                )
+            else:
+                _effects.append(effect)
+        return apply_sox_effects(audio_in, _effects)
+    audio_out = audio.copy()
+    if pre:
+        audio_out = _process(audio_out, effects.pre)
+    if post:
+        audio_out = _process(audio_out, effects.post)
+    return audio_out
+def estimate_effected_length(
+    samples: int,
+    effects: Effects,
+    frame_length: int = 1,
+    pre: bool = True,
+    post: bool = True,
+) -> int:
+    """Estimate effected audio length
+    :param samples: Original length in samples
+    :param effects: Effects
+    :param frame_length: Length will be a multiple of this
+    :param pre: Apply pre-truth effects
+    :param post: Apply post-truth effects
+    :return: Estimated length in samples
+    """
+    from .pad_audio import get_padded_length
+    def _update_samples(s: int, e: str) -> int:
+        import re
+        # speed factor[c]
+        speed_pattern = re.compile(r"^speed\s+(-?\d+(\.\d+)*)(c?)$")
+        result = re.search(speed_pattern, e)
+        if result:
+            value = float(result.group(1))
+            if result.group(3):
+                value = float(2 ** (value / 1200))
+            return int(s / value + 0.5)
+        # tempo [-q] [-m|-s|-l] factor [segment [search [overlap]]]
+        tempo_pattern = re.compile(r"^tempo\s+(-q\s+)?(((-m)|(-s)|(-l))\s+)?(\d+(\.\d+)*)")
+        result = re.search(tempo_pattern, e)
+        if result:
+            value = float(result.group(7))
+            return int(s / value + 0.5)
+        # other effects which do not affect length
+        return s
+    length = samples
+    if pre:
+        for effect in effects.pre:
+            length = _update_samples(length, effect)
+    if post:
+        for effect in effects.post:
+            length = _update_samples(length, effect)
+    return get_padded_length(length, frame_length)
+def evaluate_sai_random_float(text: str) -> str:
+    """Evaluate 'sai_rand(min, max)' directive
+    :param text: Text to evaluate
+    :return: Resolved rule
+    """
+    import re
+    from random import uniform
+    def rand_repl(m):
+        value = uniform(float(m.group(1)), float(m.group(4)))  # noqa: S311
+        return f"{value:.2f}"
+    rand_pattern = re.compile(r"sai_rand\(([-+]?(\d+(\.\d*)?|\.\d+)),\s*([-+]?(\d+(\.\d*)?|\.\d+))\)")
+    resolved = text
+    count = 0
+    while re.findall(rand_pattern, resolved) and count < 100:
+        try:
+            resolved = re.sub(rand_pattern, rand_repl, resolved)
+            count += 1
+        except Exception as e:
+            raise ValueError(f"Invalid rule: '{text}'.") from e
+    if count == 100:
+        raise ValueError(f"Invalid rule: '{text}'.")
+    return resolved
+def evaluate_sai_random_ir(mixdb: MixtureDatabase, text: str) -> str:
+    """Evaluate 'sai_rand' directive for ir
+    :param mixdb: Mixture database
+    :param text: Text to evaluate
+    :return: Resolved value
+    """
+    import re
+    from random import choice
+    from random import randint
+    rand_pattern = re.compile(r"^ir sai_rand$")
+    rand_range_pattern = re.compile(r"^ir sai_rand\(([-+]?\d+),\s*([-+]?\d+)\)$")
+    rand_tag_pattern = re.compile(r"^ir sai_rand\((\w+)\)$")
+    def rand_range_repl(m) -> str:
+        lower = int(m.group(1))
+        upper = int(m.group(2))
+        if (
+            lower < 0
+            or lower >= mixdb.num_ir_files
+            or upper < 0
+            or upper >= mixdb.num_ir_files
+            or lower >= upper
+            or str(lower) != m.group(1)
+            or str(upper) != m.group(2)
+        ):
+            raise ValueError(f"Invalid rule: '{text}'. Values must be integers between 0 and {mixdb.num_ir_files - 1}.")
+        return f"ir {randint(lower, upper)}"  # noqa: S311
+    def rand_tag_repl(m) -> str:
+        return m.group(1)
+    if re.match(rand_pattern, text):
+        return f"ir {randint(0, mixdb.num_ir_files)}"  # noqa: S311
+    if re.match(rand_range_pattern, text):
+        try:
+            return f"ir {eval(re.sub(rand_range_pattern, rand_range_repl, text))}"  # noqa: S307
+        except Exception as e:
+            raise ValueError(
+                f"Invalid rule: '{text}'. Values must be integers between 0 and {mixdb.num_ir_files - 1}."
+            ) from e
+    if re.match(rand_tag_pattern, text):
+        tag = re.sub(rand_tag_pattern, rand_tag_repl, text)
+        if tag in mixdb.ir_tags:
+            return f"ir {choice(mixdb.ir_file_ids_for_tag(tag))}"  # noqa: S311
+        raise ValueError(f"Invalid rule: '{text}'. Tag, '{tag}', not found in database.")
+    raise ValueError(f"Invalid rule: '{text}'.")
+def effects_from_rules(mixdb: MixtureDatabase, rules: Effects) -> Effects:
+    from copy import deepcopy
+    effects = deepcopy(rules)
+    for key in ("pre", "post"):
+        entries = getattr(effects, key)
+        for idx, entry in enumerate(entries):
+            if entry.find("sai_rand") != -1:
+                if entry.startswith("ir"):
+                    entries[idx] = evaluate_sai_random_ir(mixdb, entry)
+                else:
+                    entries[idx] = evaluate_sai_random_float(entry)
+        setattr(effects, key, entries)
+    return effects
+def conform_audio_to_length(audio: AudioT, length: int, repeat: bool, start: int) -> AudioT:
+    """Conform audio to given length
+    :param audio: Audio to conform
+    :param length: Length of output
+    :param repeat: Repeat samples or pad
+    :param start: Starting sample offset
+    :return: Conformed audio
+    """
+    import numpy as np
+    if repeat:
+        return np.take(audio, range(start, start + length), mode="wrap")
+    end = length + start
+    return np.pad(audio[start:], (0, end - len(audio)))
+def validate_rules(mixdb: MixtureDatabase, rules: dict[str, list[Effects]]) -> None:
+    from .sox_effects import validate_sox_effects
+    for rule_list in rules.values():
+        for rule in rule_list:
+            sox_effects: list[str] = []
+            effects = effects_from_rules(mixdb, rule)
+            for effect in effects.pre:
+                if not effect.startswith("ir"):
+                    sox_effects.append(effect)
+            for effect in effects.post:
+                for check in ("speed", "tempo"):
+                    if check in effect:
+                        raise ValueError(f"'{check}' effect is not allowed in post-truth effect chain.")
+                if not effect.startswith("ir"):
+                    sox_effects.append(effect)
+            validate_sox_effects(sox_effects)

sonusai/mixture/feature.py CHANGED Viewed

@@ -1,11 +1,8 @@
-from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import Feature
+from ..datatypes import AudioT
+from ..datatypes import Feature
-def get_feature_from_audio(
-    audio: AudioT,
-    feature_mode: str,
-) -> Feature:
+def get_feature_from_audio(audio: AudioT, feature_mode: str) -> Feature:
     """Apply forward transform and generate feature data from audio data
     :param audio: Time domain audio data [samples]
@@ -14,7 +11,7 @@ def get_feature_from_audio(
     """
     from pyaaware import FeatureGenerator
-    from .datatypes import TransformConfig
+    from ..datatypes import TransformConfig
     from .helpers import forward_transform
     fg = FeatureGenerator(feature_mode=feature_mode)
@@ -43,10 +40,9 @@ def get_audio_from_feature(feature: Feature, feature_mode: str) -> AudioT:
     import numpy as np
     from pyaaware import FeatureGenerator
-    from sonusai.utils.compress import power_uncompress
-    from sonusai.utils.stacked_complex import unstack_complex
-    from .datatypes import TransformConfig
+    from ..datatypes import TransformConfig
+    from ..utils.compress import power_uncompress
+    from ..utils.stacked_complex import unstack_complex
     from .helpers import inverse_transform
     if feature.ndim != 3:

sonusai 0.20.3__py3-none-any.whl → 1.0.2__py3-none-any.whl

sonusai 0.20.3py3-none-any.whl → 1.0.2py3-none-any.whl