PyPI - sonusai - Versions diffs - 1.0.16__cp311-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl - Mend

sonusai 1.0.16__cp311-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

sonusai/__init__.py +170 -0
sonusai/aawscd_probwrite.py +148 -0
sonusai/audiofe.py +481 -0
sonusai/calc_metric_spenh.py +1136 -0
sonusai/config/__init__.py +0 -0
sonusai/config/asr.py +21 -0
sonusai/config/config.py +65 -0
sonusai/config/config.yml +49 -0
sonusai/config/constants.py +53 -0
sonusai/config/ir.py +124 -0
sonusai/config/ir_delay.py +62 -0
sonusai/config/source.py +275 -0
sonusai/config/spectral_masks.py +15 -0
sonusai/config/truth.py +64 -0
sonusai/constants.py +14 -0
sonusai/data/__init__.py +0 -0
sonusai/data/silero_vad_v5.1.jit +0 -0
sonusai/data/silero_vad_v5.1.onnx +0 -0
sonusai/data/speech_ma01_01.wav +0 -0
sonusai/data/whitenoise.wav +0 -0
sonusai/datatypes.py +383 -0
sonusai/deprecated/gentcst.py +632 -0
sonusai/deprecated/plot.py +519 -0
sonusai/deprecated/tplot.py +365 -0
sonusai/doc.py +52 -0
sonusai/doc_strings/__init__.py +1 -0
sonusai/doc_strings/doc_strings.py +531 -0
sonusai/genft.py +196 -0
sonusai/genmetrics.py +183 -0
sonusai/genmix.py +199 -0
sonusai/genmixdb.py +235 -0
sonusai/ir_metric.py +551 -0
sonusai/lsdb.py +141 -0
sonusai/main.py +134 -0
sonusai/metrics/__init__.py +43 -0
sonusai/metrics/calc_audio_stats.py +42 -0
sonusai/metrics/calc_class_weights.py +90 -0
sonusai/metrics/calc_optimal_thresholds.py +73 -0
sonusai/metrics/calc_pcm.py +45 -0
sonusai/metrics/calc_pesq.py +36 -0
sonusai/metrics/calc_phase_distance.py +43 -0
sonusai/metrics/calc_sa_sdr.py +64 -0
sonusai/metrics/calc_sample_weights.py +25 -0
sonusai/metrics/calc_segsnr_f.py +82 -0
sonusai/metrics/calc_speech.py +382 -0
sonusai/metrics/calc_wer.py +71 -0
sonusai/metrics/calc_wsdr.py +57 -0
sonusai/metrics/calculate_metrics.py +395 -0
sonusai/metrics/class_summary.py +74 -0
sonusai/metrics/confusion_matrix_summary.py +75 -0
sonusai/metrics/one_hot.py +283 -0
sonusai/metrics/snr_summary.py +128 -0
sonusai/metrics_summary.py +314 -0
sonusai/mixture/__init__.py +15 -0
sonusai/mixture/audio.py +187 -0
sonusai/mixture/class_balancing.py +103 -0
sonusai/mixture/constants.py +3 -0
sonusai/mixture/data_io.py +173 -0
sonusai/mixture/db.py +169 -0
sonusai/mixture/db_datatypes.py +92 -0
sonusai/mixture/effects.py +344 -0
sonusai/mixture/feature.py +78 -0
sonusai/mixture/generation.py +1116 -0
sonusai/mixture/helpers.py +351 -0
sonusai/mixture/ir_effects.py +77 -0
sonusai/mixture/log_duration_and_sizes.py +23 -0
sonusai/mixture/mixdb.py +1857 -0
sonusai/mixture/pad_audio.py +35 -0
sonusai/mixture/resample.py +7 -0
sonusai/mixture/sox_effects.py +195 -0
sonusai/mixture/sox_help.py +650 -0
sonusai/mixture/spectral_mask.py +51 -0
sonusai/mixture/truth.py +61 -0
sonusai/mixture/truth_functions/__init__.py +45 -0
sonusai/mixture/truth_functions/crm.py +105 -0
sonusai/mixture/truth_functions/energy.py +222 -0
sonusai/mixture/truth_functions/file.py +48 -0
sonusai/mixture/truth_functions/metadata.py +24 -0
sonusai/mixture/truth_functions/metrics.py +28 -0
sonusai/mixture/truth_functions/phoneme.py +18 -0
sonusai/mixture/truth_functions/sed.py +98 -0
sonusai/mixture/truth_functions/target.py +142 -0
sonusai/mkwav.py +135 -0
sonusai/onnx_predict.py +363 -0
sonusai/parse/__init__.py +0 -0
sonusai/parse/expand.py +156 -0
sonusai/parse/parse_source_directive.py +129 -0
sonusai/parse/rand.py +214 -0
sonusai/py.typed +0 -0
sonusai/queries/__init__.py +0 -0
sonusai/queries/queries.py +239 -0
sonusai/rs.abi3.so +0 -0
sonusai/rs.pyi +1 -0
sonusai/rust/__init__.py +0 -0
sonusai/speech/__init__.py +0 -0
sonusai/speech/l2arctic.py +121 -0
sonusai/speech/librispeech.py +102 -0
sonusai/speech/mcgill.py +71 -0
sonusai/speech/textgrid.py +89 -0
sonusai/speech/timit.py +138 -0
sonusai/speech/types.py +12 -0
sonusai/speech/vctk.py +53 -0
sonusai/speech/voxceleb.py +108 -0
sonusai/utils/__init__.py +3 -0
sonusai/utils/asl_p56.py +130 -0
sonusai/utils/asr.py +91 -0
sonusai/utils/asr_functions/__init__.py +3 -0
sonusai/utils/asr_functions/aaware_whisper.py +69 -0
sonusai/utils/audio_devices.py +50 -0
sonusai/utils/braced_glob.py +50 -0
sonusai/utils/calculate_input_shape.py +26 -0
sonusai/utils/choice.py +51 -0
sonusai/utils/compress.py +25 -0
sonusai/utils/convert_string_to_number.py +6 -0
sonusai/utils/create_timestamp.py +5 -0
sonusai/utils/create_ts_name.py +14 -0
sonusai/utils/dataclass_from_dict.py +27 -0
sonusai/utils/db.py +16 -0
sonusai/utils/docstring.py +53 -0
sonusai/utils/energy_f.py +44 -0
sonusai/utils/engineering_number.py +166 -0
sonusai/utils/evaluate_random_rule.py +15 -0
sonusai/utils/get_frames_per_batch.py +2 -0
sonusai/utils/get_label_names.py +20 -0
sonusai/utils/grouper.py +6 -0
sonusai/utils/human_readable_size.py +7 -0
sonusai/utils/keyboard_interrupt.py +12 -0
sonusai/utils/load_object.py +21 -0
sonusai/utils/max_text_width.py +9 -0
sonusai/utils/model_utils.py +28 -0
sonusai/utils/numeric_conversion.py +11 -0
sonusai/utils/onnx_utils.py +155 -0
sonusai/utils/parallel.py +162 -0
sonusai/utils/path_info.py +7 -0
sonusai/utils/print_mixture_details.py +60 -0
sonusai/utils/rand.py +13 -0
sonusai/utils/ranges.py +43 -0
sonusai/utils/read_predict_data.py +32 -0
sonusai/utils/reshape.py +154 -0
sonusai/utils/seconds_to_hms.py +7 -0
sonusai/utils/stacked_complex.py +82 -0
sonusai/utils/stratified_shuffle_split.py +170 -0
sonusai/utils/tokenized_shell_vars.py +143 -0
sonusai/utils/write_audio.py +26 -0
sonusai/utils/yes_or_no.py +8 -0
sonusai/vars.py +47 -0
sonusai-1.0.16.dist-info/METADATA +56 -0
sonusai-1.0.16.dist-info/RECORD +150 -0
sonusai-1.0.16.dist-info/WHEEL +4 -0
sonusai-1.0.16.dist-info/entry_points.txt +3 -0

sonusai/parse/rand.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+Parse 'rand' expressions.
+"""
+import decimal
+import re
+from random import uniform
+import pyparsing as pp
+SIGNIFICANT_DIGITS = 6
+def rand(directive: str) -> str:
+    """Evaluate the 'rand(min, max)' directive and validate its syntax.
+    :param directive: Directive to evaluate
+    :return: Text with all 'rand' directives replaced with a random value,
+                with a certain number of significant digits, or an empty string if 'text' is empty or None.
+    :raises ValueError: If the expression cannot be parsed or is malformed.
+    """
+    if not directive:
+        return directive
+    # Create a recursive grammar for correct expressions
+    expr = pp.Forward()
+    number = pp.pyparsing_common.number
+    func_name = pp.Literal("rand")
+    left_paren = pp.Literal("(").suppress()
+    right_paren = pp.Literal(")").suppress()
+    comma = pp.Literal(",").suppress()
+    # Allow whitespace around function parameters
+    rand_function = (
+        func_name
+        + left_paren
+        + pp.Optional(pp.White()).suppress()
+        + (number | expr)("min_val")
+        + pp.Optional(pp.White()).suppress()
+        + comma
+        + pp.Optional(pp.White()).suppress()
+        + (number | expr)("max_val")
+        + pp.Optional(pp.White()).suppress()
+        + right_paren
+    )
+    # Complete the recursive definition
+    expr << rand_function  # pyright: ignore [reportUnusedExpression]
+    # Define parse action for generating random values
+    def replace_with_random(tokens):
+        min_val_token = tokens["min_val"]
+        max_val_token = tokens["max_val"]
+        # Convert tokens to float, handling both direct values and strings
+        min_val = float(min_val_token)
+        max_val = float(max_val_token)
+        # Validate min/max relationship
+        if min_val > max_val:
+            raise ValueError(f"Min value ({min_val}) cannot be greater than max value ({max_val})")
+        # Generate random value
+        value = uniform(min_val, max_val)  # noqa: S311
+        decimal.getcontext().prec = SIGNIFICANT_DIGITS
+        return str(decimal.Decimal(value).normalize())
+    rand_function.setParseAction(replace_with_random)
+    # Create a validator parser for syntax checking only.
+    # This parser doesn't transform but just validates the syntax.
+    validator = pp.Forward()
+    validator_rand = (
+        func_name
+        + left_paren
+        + pp.Optional(pp.White()).suppress()
+        + (number | validator)
+        + pp.Optional(pp.White()).suppress()
+        + comma
+        + pp.Optional(pp.White()).suppress()
+        + (number | validator)
+        + pp.Optional(pp.White()).suppress()
+        + right_paren
+    )
+    validator << validator_rand  # pyright: ignore [reportUnusedExpression]
+    try:
+        # First, try to validate all 'rand' expressions without evaluating them.
+        # This helps identify structural problems before evaluation.
+        malformations = []
+        # Find all potential 'rand' expressions with or without opening/closing parentheses
+        potential_expressions = list(re.finditer(r"rand\s*(\()?(?:[^()]|\([^()]*\))*\)?", directive))
+        for match in potential_expressions:
+            expr_text = match.group(0)
+            # Check for missing opening parenthesis
+            if "rand" in expr_text and "(" not in expr_text:
+                malformations.append(f"Missing opening parenthesis in '{expr_text}'")
+                continue
+            # Check for missing closing parenthesis
+            if not expr_text.endswith(")"):
+                malformations.append(f"Missing closing parenthesis in '{expr_text}'")
+                continue
+            # Try to validate the expression structure
+            try:
+                validator.parseString(expr_text, parseAll=True)
+            except pp.ParseException:
+                # Count commas to check for parameter issues
+                param_text = expr_text[expr_text.find("(") + 1 : expr_text.rfind(")")]
+                # Track parenthesis nesting level to count commas correctly
+                nesting_level = 0
+                comma_count = 0
+                for char in param_text:
+                    if char == "(":
+                        nesting_level += 1
+                    elif char == ")":
+                        nesting_level -= 1
+                    elif char == "," and nesting_level == 0:
+                        comma_count += 1
+                if comma_count == 0:
+                    if not param_text.strip():
+                        malformations.append(f"Missing parameters in '{expr_text}' (expected 2)")
+                    else:
+                        # Check if there might be a space instead of comma
+                        if re.search(r"\d+\s+[-+]?\d+", param_text):
+                            malformations.append(f"Missing comma between parameters in '{expr_text}'")
+                        else:
+                            malformations.append(f"Too few parameters in '{expr_text}' (expected 2, got 1)")
+                elif comma_count > 1:
+                    malformations.append(f"Too many parameters in '{expr_text}' (expected 2, got {comma_count + 1})")
+                else:
+                    # There's 1 comma, so we have 2 parameters, but still a parsing error
+                    # This is likely a non-numeric parameter
+                    params = [p.strip() for p in split_params_respecting_nesting(param_text)]
+                    for i, param in enumerate(params):
+                        # Check nested 'rand' expressions for validity
+                        if "rand" in param:
+                            # Check if the nested expression is valid by recursively calling 'rand'
+                            try:
+                                # We only want to validate, not transform
+                                nested_validator = pp.Forward()
+                                nested_validator_rand = (
+                                    func_name
+                                    + left_paren
+                                    + pp.Optional(pp.White()).suppress()
+                                    + (number | nested_validator)
+                                    + pp.Optional(pp.White()).suppress()
+                                    + comma
+                                    + pp.Optional(pp.White()).suppress()
+                                    + (number | nested_validator)
+                                    + pp.Optional(pp.White()).suppress()
+                                    + right_paren
+                                )
+                                nested_validator << nested_validator_rand  # pyright: ignore [reportUnusedExpression]
+                                nested_validator.parseString(param, parseAll=True)
+                            except pp.ParseException:
+                                malformations.append(f"Invalid nested expression '{param}' in '{expr_text}'")
+                            continue
+                        # Check if the parameter is numeric
+                        if not is_numeric(param):
+                            param_name = "first" if i == 0 else "second"
+                            malformations.append(f"Non-numeric {param_name} parameter '{param}' in '{expr_text}'")
+        if malformations:
+            raise ValueError(f"Malformed rand directive: {'; '.join(malformations)}")
+        # If validation passes, try to transform
+        result = rand_function.transformString(directive)
+    except pp.ParseException as e:
+        raise ValueError(f"Invalid rand expression in '{directive}': {e!s}") from e
+    return result
+def is_numeric(text: str) -> bool:
+    """Check if the text is a valid number (including scientific notation)."""
+    numeric_pattern = r"^[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?$"
+    return bool(re.match(numeric_pattern, text))
+def split_params_respecting_nesting(param_text: str) -> list:
+    """Split parameters by comma while respecting nested parentheses."""
+    result = []
+    current_param = []
+    nesting_level = 0
+    for char in param_text:
+        if char == "(":
+            nesting_level += 1
+            current_param.append(char)
+        elif char == ")" and nesting_level > 0:
+            nesting_level -= 1
+            current_param.append(char)
+        elif char == "," and nesting_level == 0:
+            result.append("".join(current_param))
+            current_param = []
+        else:
+            current_param.append(char)
+    if current_param:
+        result.append("".join(current_param))
+    return result

sonusai/py.typed ADDED Viewed

File without changes

sonusai/queries/__init__.py ADDED Viewed

File without changes

sonusai/queries/queries.py ADDED Viewed

@@ -0,0 +1,239 @@
+from collections.abc import Callable
+from typing import Any
+from ..datatypes import GeneralizedIDs
+from ..mixture.mixdb import MixtureDatabase
+def _true_predicate(_: Any) -> bool:
+    return True
+def get_mixids_from_mixture_field_predicate(
+    mixdb: MixtureDatabase,
+    field: str,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixture IDs based on the mixture field and predicate
+    Return a dictionary where:
+        - keys are the matching field values
+        - values are lists of the mixids that match the criteria
+    """
+    mixid_out = mixdb.mixids_to_list(mixids)
+    if predicate is None:
+        predicate = _true_predicate
+    criteria_set = set()
+    for m_id in mixid_out:
+        value = getattr(mixdb.mixture(m_id), field)
+        if isinstance(value, dict):
+            for v in value.values():
+                if predicate(v):
+                    criteria_set.add(v)
+        elif predicate(value):
+            criteria_set.add(value)
+    criteria = sorted(criteria_set)
+    result: dict[int, list[int]] = {}
+    for criterion in criteria:
+        result[criterion] = []
+        for m_id in mixid_out:
+            value = getattr(mixdb.mixture(m_id), field)
+            if isinstance(value, dict):
+                for v in value.values():
+                    if v == criterion:
+                        result[criterion].append(m_id)
+            elif value == criterion:
+                result[criterion].append(m_id)
+    return result
+def get_mixids_from_truth_configs_field_predicate(
+    mixdb: MixtureDatabase,
+    field: str,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixture IDs based on the target truth_configs field and predicate
+    Return a dictionary where:
+        - keys are the matching field values
+        - values are lists of the mixids that match the criteria
+    """
+    from ..config.constants import REQUIRED_TRUTH_CONFIG_FIELDS
+    mixid_out = mixdb.mixids_to_list(mixids)
+    # Get all field values
+    values = get_all_truth_configs_values_from_field(mixdb, field)
+    if predicate is None:
+        predicate = _true_predicate
+    # Get only values of interest
+    values = [value for value in values if predicate(value)]
+    result = {}
+    for value in values:
+        # Get a list of sources for each field value
+        indices = []
+        for s_ids in mixdb.source_file_ids.values():
+            for s_id in s_ids:
+                source = mixdb.source_file(s_id)
+                for truth_config in source.truth_configs.values():
+                    if field in REQUIRED_TRUTH_CONFIG_FIELDS:
+                        if value in getattr(truth_config, field):
+                            indices.append(s_id)
+                    else:
+                        if value in getattr(truth_config.config, field):
+                            indices.append(s_id)
+        indices = sorted(set(indices))
+        mixids = []
+        for index in indices:
+            for m_id in mixid_out:
+                if index in [source.file_id for source in mixdb.mixture(m_id).all_sources.values()]:
+                    mixids.append(m_id)
+        mixids = sorted(set(mixids))
+        if mixids:
+            result[value] = mixids
+    return result
+def get_all_truth_configs_values_from_field(mixdb: MixtureDatabase, field: str) -> list:
+    """
+    Generate a list of all values corresponding to the given field in truth_configs
+    """
+    from ..config.constants import REQUIRED_TRUTH_CONFIG_FIELDS
+    result = []
+    for sources in mixdb.source_files.values():
+        for source in sources:
+            for truth_config in source.truth_configs.values():
+                if field in REQUIRED_TRUTH_CONFIG_FIELDS:
+                    value = getattr(truth_config, field)
+                else:
+                    value = getattr(truth_config.config, field, None)
+                if not isinstance(value, list):
+                    value = [value]
+                result.extend(value)
+    return sorted(set(result))
+def get_mixids_from_noise(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixids based on noise index predicate
+    Return a dictionary where:
+        - keys are the noise indices
+        - values are lists of the mixids that match the noise index
+    """
+    return get_mixids_from_mixture_field_predicate(mixdb=mixdb, mixids=mixids, field="noise_id", predicate=predicate)
+def get_mixids_from_source(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixids based on a source index predicate
+    Return a dictionary where:
+        - keys are the source indices
+        - values are lists of the mixids that match the source index
+    """
+    return get_mixids_from_mixture_field_predicate(mixdb=mixdb, mixids=mixids, field="source_ids", predicate=predicate)
+def get_mixids_from_snr(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[float, list[int]]:
+    """
+    Generate mixids based on an SNR predicate
+    Return a dictionary where:
+        - keys are the SNRs
+        - values are lists of the mixids that match the SNR
+    """
+    mixid_out = mixdb.mixids_to_list(mixids)
+    # Get all the SNRs
+    snrs = [float(snr) for snr in mixdb.all_snrs if not snr.is_random]
+    if predicate is None:
+        predicate = _true_predicate
+    # Get only the SNRs of interest (filter on predicate)
+    snrs = [snr for snr in snrs if predicate(snr)]
+    result: dict[float, list[int]] = {}
+    for snr in snrs:
+        # Get a list of mixids for each SNR
+        result[snr] = sorted(
+            [i for i, mixture in enumerate(mixdb.mixtures) if mixture.noise.snr == snr and i in mixid_out]
+        )
+    return result
+def get_mixids_from_class_indices(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixids based on a class index predicate
+    Return a dictionary where:
+        - keys are the class indices
+        - values are lists of the mixids that match the class index
+    """
+    mixid_out = mixdb.mixids_to_list(mixids)
+    if predicate is None:
+        predicate = _true_predicate
+    criteria_set = set()
+    for m_id in mixid_out:
+        class_indices = mixdb.mixture_class_indices(m_id)
+        for class_index in class_indices:
+            if predicate(class_index):
+                criteria_set.add(class_index)
+    criteria = sorted(criteria_set)
+    result: dict[int, list[int]] = {}
+    for criterion in criteria:
+        result[criterion] = []
+        for m_id in mixid_out:
+            class_indices = mixdb.mixture_class_indices(m_id)
+            for class_index in class_indices:
+                if class_index == criterion:
+                    result[criterion].append(m_id)
+    return result
+def get_mixids_from_truth_function(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs = "*",
+    predicate: Callable[[Any], bool] | None = None,
+) -> dict[int, list[int]]:
+    """
+    Generate mixids based on a truth function predicate
+    Return a dictionary where:
+        - keys are the truth functions
+        - values are lists of the mixids that match the truth function
+    """
+    return get_mixids_from_truth_configs_field_predicate(
+        mixdb=mixdb, mixids=mixids, field="function", predicate=predicate
+    )

sonusai/rs.abi3.so ADDED Viewed

Binary file

sonusai/rs.pyi ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__: str

sonusai/rust/__init__.py ADDED Viewed

File without changes

sonusai/speech/__init__.py ADDED Viewed

File without changes

sonusai/speech/l2arctic.py ADDED Viewed

@@ -0,0 +1,121 @@
+import os
+import string
+from pathlib import Path
+from .types import TimeAlignedType
+def _get_duration(name: str) -> float:
+    import soundfile
+    try:
+        return soundfile.info(name).duration
+    except Exception as e:
+        raise OSError(f"Error reading {name}: {e}") from e
+def load_text(audio: str | os.PathLike[str]) -> TimeAlignedType | None:
+    """Load time-aligned text data given a L2-ARCTIC audio file.
+    :param audio: Path to the L2-ARCTIC audio file.
+    :return: A TimeAlignedType object.
+    """
+    file = Path(audio).parent.parent / "transcript" / (Path(audio).stem + ".txt")
+    if not os.path.exists(file):
+        return None
+    with open(file, encoding="utf-8") as f:
+        line = f.read()
+    return TimeAlignedType(
+        0,
+        _get_duration(str(audio)),
+        line.strip().lower().translate(str.maketrans("", "", string.punctuation)),
+    )
+def load_words(audio: str | os.PathLike[str]) -> list[TimeAlignedType] | None:
+    """Load time-aligned word data given a L2-ARCTIC audio file.
+    :param audio: Path to the L2-ARCTIC audio file.
+    :return: A list of TimeAlignedType objects.
+    """
+    return _load_ta(audio, "words")
+def load_phonemes(audio: str | os.PathLike[str]) -> list[TimeAlignedType] | None:
+    """Load time-aligned phonemes data given a L2-ARCTIC audio file.
+    :param audio: Path to the L2-ARCTIC audio file.
+    :return: A list of TimeAlignedType objects.
+    """
+    return _load_ta(audio, "phones")
+def _load_ta(audio: str | os.PathLike[str], tier: str) -> list[TimeAlignedType] | None:
+    from praatio import textgrid
+    from praatio.utilities.constants import Interval
+    file = Path(audio).parent.parent / "textgrid" / (Path(audio).stem + ".TextGrid")
+    if not os.path.exists(file):
+        return None
+    tg = textgrid.openTextgrid(str(file), includeEmptyIntervals=False)
+    if tier not in tg.tierNames:
+        return None
+    entries: list[TimeAlignedType] = []
+    for entry in tg.getTier(tier).entries:
+        if isinstance(entry, Interval):
+            entries.append(TimeAlignedType(text=entry.label, start=entry.start, end=entry.end))
+    return entries
+def load_annotations(
+    audio: str | os.PathLike[str],
+) -> dict[str, list[TimeAlignedType]] | None:
+    """Load time-aligned annotation data given a L2-ARCTIC audio file.
+    :param audio: Path to the L2-ARCTIC audio file.
+    :return: A dictionary of a list of TimeAlignedType objects.
+    """
+    from praatio import textgrid
+    from praatio.utilities.constants import Interval
+    file = Path(audio).parent.parent / "annotation" / (Path(audio).stem + ".TextGrid")
+    if not os.path.exists(file):
+        return None
+    tg = textgrid.openTextgrid(str(file), includeEmptyIntervals=False)
+    result: dict[str, list[TimeAlignedType]] = {}
+    for tier in tg.tierNames:
+        entries: list[TimeAlignedType] = []
+        for entry in tg.getTier(tier).entries:
+            if isinstance(entry, Interval):
+                entries.append(TimeAlignedType(text=entry.label, start=entry.start, end=entry.end))
+        result[tier] = entries
+    return result
+def load_speakers(input_dir: Path) -> dict:
+    speakers = {}
+    with open(input_dir / "readme-download.txt") as file:
+        processing = False
+        for line in file:
+            if not processing and line.startswith("|---|"):
+                processing = True
+                continue
+            if processing:
+                if line.startswith("|**Total**|"):
+                    break
+                else:
+                    fields = line.strip().split("|")
+                    speaker_id = fields[1]
+                    gender = fields[2]
+                    dialect = fields[3]
+                    speakers[speaker_id] = {"gender": gender, "dialect": dialect}
+    return speakers

sonusai/speech/librispeech.py ADDED Viewed

@@ -0,0 +1,102 @@
+import os
+from pathlib import Path
+from .types import TimeAlignedType
+def _get_num_samples(audio: str | os.PathLike[str]) -> int:
+    """Get number of samples from audio file using soundfile
+    :param audio: Audio file name
+    :return: Number of samples
+    """
+    import soundfile
+    from pydub import AudioSegment
+    if Path(audio).suffix == ".mp3":
+        return AudioSegment.from_mp3(audio).frame_count()
+    if Path(audio).suffix == ".m4a":
+        return AudioSegment.from_file(audio).frame_count()
+    return soundfile.info(audio).frames
+def load_text(audio: str | os.PathLike[str]) -> TimeAlignedType | None:
+    """Load text data from a LibriSpeech transcription file given a LibriSpeech audio filename.
+    :param audio: Path to the LibriSpeech audio file.
+    :return: A TimeAlignedType object.
+    """
+    import string
+    from ..mixture.audio import get_sample_rate
+    path = Path(audio)
+    name = path.stem
+    transcript_filename = path.parent / f"{path.parent.parent.name}-{path.parent.name}.trans.txt"
+    if not os.path.exists(transcript_filename):
+        return None
+    with open(transcript_filename, encoding="utf-8") as f:
+        for line in f.readlines():
+            fields = line.strip().split()
+            key = fields[0]
+            if key == name:
+                text = " ".join(fields[1:]).lower().translate(str.maketrans("", "", string.punctuation))
+                return TimeAlignedType(0, _get_num_samples(audio) / get_sample_rate(str(audio)), text)
+    return None
+def load_words(audio: str | os.PathLike[str]) -> list[TimeAlignedType] | None:
+    """Load time-aligned word data given a LibriSpeech audio file.
+    :param audio: Path to the Librispeech audio file.
+    :return: A list of TimeAlignedType objects.
+    """
+    return _load_ta(audio, "words")
+def load_phonemes(audio: str | os.PathLike[str]) -> list[TimeAlignedType] | None:
+    """Load time-aligned phonemes data given a LibriSpeech audio file.
+    :param audio: Path to the LibriSpeech audio file.
+    :return: A list of TimeAlignedType objects.
+    """
+    return _load_ta(audio, "phones")
+def _load_ta(audio: str | os.PathLike[str], tier: str) -> list[TimeAlignedType] | None:
+    from praatio import textgrid
+    from praatio.utilities.constants import Interval
+    file = Path(audio).with_suffix(".TextGrid")
+    if not os.path.exists(file):
+        return None
+    tg = textgrid.openTextgrid(str(file), includeEmptyIntervals=False)
+    if tier not in tg.tierNames:
+        return None
+    entries: list[TimeAlignedType] = []
+    for entry in tg.getTier(tier).entries:
+        if isinstance(entry, Interval):
+            entries.append(TimeAlignedType(text=entry.label, start=entry.start, end=entry.end))
+        else:
+            entries.append(TimeAlignedType(text=entry.label, start=entry.time, end=entry.time))
+    return entries
+def load_speakers(input_dir: Path) -> dict:
+    speakers = {}
+    with open(input_dir / "SPEAKERS.TXT") as file:
+        for line in file:
+            if not line.startswith(";"):
+                fields = line.strip().split("|")
+                speaker_id = fields[0].strip()
+                gender = fields[1].strip()
+                speakers[speaker_id] = {"gender": gender}
+    return speakers