PyPI - sonusai - Versions diffs - 0.19.8__py3-none-any.whl → 0.19.10__py3-none-any.whl - Mend

sonusai 0.19.8py3-none-any.whl → 0.19.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

sonusai/calc_metric_spenh.py +265 -233
sonusai/data/silero_vad_v5.1.jit +0 -0
sonusai/data/silero_vad_v5.1.onnx +0 -0
sonusai/genft.py +1 -1
sonusai/genmetrics.py +15 -18
sonusai/genmix.py +1 -1
sonusai/genmixdb.py +32 -54
sonusai/metrics_summary.py +320 -0
sonusai/mixture/__init__.py +2 -1
sonusai/mixture/audio.py +40 -7
sonusai/mixture/generation.py +100 -121
sonusai/mixture/helpers.py +22 -7
sonusai/mixture/mixdb.py +90 -30
sonusai/mixture/torchaudio_audio.py +18 -7
sonusai/mixture/torchaudio_augmentation.py +3 -4
sonusai/mixture/truth_functions/energy.py +9 -5
sonusai/mixture/truth_functions/metrics.py +1 -1
sonusai/mkwav.py +1 -1
sonusai/onnx_predict.py +1 -1
sonusai/queries/queries.py +1 -1
sonusai/utils/asr.py +1 -1
sonusai/utils/load_object.py +8 -2
sonusai/utils/stratified_shuffle_split.py +1 -1
{sonusai-0.19.8.dist-info → sonusai-0.19.10.dist-info}/METADATA +1 -1
{sonusai-0.19.8.dist-info → sonusai-0.19.10.dist-info}/RECORD +27 -24
{sonusai-0.19.8.dist-info → sonusai-0.19.10.dist-info}/WHEEL +0 -0
{sonusai-0.19.8.dist-info → sonusai-0.19.10.dist-info}/entry_points.txt +0 -0

sonusai/mixture/audio.py CHANGED Viewed

@@ -44,44 +44,77 @@ def validate_input_file(input_filepath: str | Path) -> None:
         raise OSError(f"This installation cannot process .{ext} files")
-@lru_cache
-def get_sample_rate(name: str | Path) -> int:
+def get_sample_rate(name: str | Path, use_cache: bool = True) -> int:
     """Get sample rate from audio file
     :param name: File name
+    :param use_cache: If true, use LRU caching
     :return: Sample rate
     """
+    if use_cache:
+        return _get_sample_rate(name)
+    return _get_sample_rate.__wrapped__(name)
+@lru_cache
+def _get_sample_rate(name: str | Path) -> int:
     from .soundfile_audio import get_sample_rate
     return get_sample_rate(name)
-@lru_cache
-def read_audio(name: str | Path) -> AudioT:
+def read_audio(name: str | Path, use_cache: bool = True) -> AudioT:
     """Read audio data from a file
     :param name: File name
+    :param use_cache: If true, use LRU caching
     :return: Array of time domain audio data
     """
+    if use_cache:
+        return _read_audio(name)
+    return _read_audio.__wrapped__(name)
+@lru_cache
+def _read_audio(name: str | Path) -> AudioT:
     from .soundfile_audio import read_audio
     return read_audio(name)
-@lru_cache
-def read_ir(name: str | Path) -> ImpulseResponseData:
+def read_ir(name: str | Path, use_cache: bool = True) -> ImpulseResponseData:
     """Read impulse response data
     :param name: File name
+    :param use_cache: If true, use LRU caching
     :return: ImpulseResponseData object
     """
+    if use_cache:
+        return _read_ir(name)
+    return _read_ir.__wrapped__(name)
+@lru_cache
+def _read_ir(name: str | Path) -> ImpulseResponseData:
     from .soundfile_audio import read_ir
     return read_ir(name)
+def get_num_samples(name: str | Path, use_cache: bool = True) -> int:
+    """Get the number of samples resampled to the SonusAI sample rate in the given file
+    :param name: File name
+    :param use_cache: If true, use LRU caching
+    :return: number of samples in resampled audio
+    """
+    if use_cache:
+        return _get_num_samples(name)
+    return _get_num_samples.__wrapped__(name)
 @lru_cache
-def get_num_samples(name: str | Path) -> int:
+def _get_num_samples(name: str | Path) -> int:
     """Get the number of samples resampled to the SonusAI sample rate in the given file
     :param name: File name

sonusai/mixture/generation.py CHANGED Viewed

@@ -119,8 +119,7 @@ def initialize_db(location: str, test: bool = False) -> None:
     id INTEGER PRIMARY KEY NOT NULL,
     file_id INTEGER NOT NULL,
     augmentation TEXT NOT NULL,
-    FOREIGN KEY(file_id) REFERENCES target_file (id),
-    UNIQUE(file_id, augmentation))
+    FOREIGN KEY(file_id) REFERENCES target_file (id))
     """)
     con.execute("""
@@ -389,8 +388,7 @@ def update_mixid_width(location: str, num_mixtures: int, test: bool = False) ->
     con.close()
-def populate_mixture_table(
-    location: str,
+def generate_mixtures(
     noise_mix_mode: str,
     augmented_targets: list[AugmentedTarget],
     target_files: list[TargetFile],
@@ -403,9 +401,8 @@ def populate_mixture_table(
     num_classes: int,
     feature_step_samples: int,
     num_ir: int,
-    test: bool = False,
-) -> tuple[int, int]:
-    """Generate mixtures and populate mixture table"""
+) -> tuple[int, int, list[Mixture]]:
+    """Generate mixtures"""
     if noise_mix_mode == "exhaustive":
         func = _exhaustive_noise_mix
     elif noise_mix_mode == "non-exhaustive":
@@ -415,8 +412,7 @@ def populate_mixture_table(
     else:
         raise ValueError(f"invalid noise_mix_mode: {noise_mix_mode}")
-    used_noise_files, used_noise_samples = func(
-        location=location,
+    return func(
         augmented_targets=augmented_targets,
         target_files=target_files,
         target_augmentations=target_augmentations,
@@ -428,23 +424,76 @@ def populate_mixture_table(
         num_classes=num_classes,
         feature_step_samples=feature_step_samples,
         num_ir=num_ir,
-        test=test,
     )
-    return used_noise_files, used_noise_samples
+def populate_mixture_table(
+    location: str,
+    mixtures: list[Mixture],
+    test: bool = False,
+    logging: bool = False,
+    show_progress: bool = False,
+) -> None:
+    """Populate mixture table"""
+    from sonusai import logger
+    from sonusai.utils import track
+    from .helpers import from_mixture
+    from .helpers import from_target
+    from .mixdb import db_connection
+    con = db_connection(location=location, readonly=False, test=test)
+    # Populate target table
+    if logging:
+        logger.info("Populating target table")
+    targets: list[tuple[int, str]] = []
+    for mixture in mixtures:
+        for target in mixture.targets:
+            entry = from_target(target)
+            if entry not in targets:
+                targets.append(entry)
+    for target in track(targets, disable=not show_progress):
+        con.execute("INSERT INTO target (file_id, augmentation) VALUES (?, ?)", target)
+    # Populate mixture table
+    if logging:
+        logger.info("Populating mixture table")
+    for mixture in track(mixtures, disable=not show_progress):
+        m_id = int(mixture.name)
+        con.execute(
+            """
+            INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
+            snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (m_id + 1, *from_mixture(mixture)),
+        )
+        for target in mixture.targets:
+            target_id = con.execute(
+                """
+                SELECT target.id
+                FROM target
+                WHERE ? = target.file_id AND ? = target.augmentation
+            """,
+                from_target(target),
+            ).fetchone()[0]
+            con.execute(
+                "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
+                (m_id + 1, target_id),
+            )
+    con.commit()
+    con.close()
-def update_mixture_table(location: str, m_id: int, with_data: bool = False, test: bool = False) -> GenMixData:
+def update_mixture(mixdb: MixtureDatabase, mixture: Mixture, with_data: bool = False) -> tuple[Mixture, GenMixData]:
     """Update mixture record with name and gains"""
     from .audio import get_next_noise
     from .augmentation import apply_gain
-    from .datatypes import GenMixData
-    from .helpers import from_mixture
     from .helpers import get_target
-    from .mixdb import db_connection
-    mixdb = MixtureDatabase(location, test)
-    mixture = mixdb.mixture(m_id)
     mixture, targets_audio = _initialize_targets_audio(mixdb, mixture)
     noise_audio = _augmented_noise_audio(mixdb, mixture)
@@ -459,29 +508,8 @@ def update_mixture_table(location: str, m_id: int, with_data: bool = False, test
     mixture.name = f"{int(mixture.name):0{mixdb.mixid_width}}"
-    con = db_connection(location=location, readonly=False, test=test)
-    con.execute(
-        """
-        UPDATE mixture SET  name=?,
-                            noise_file_id=?,
-                            noise_augmentation=?,
-                            noise_offset=?,
-                            noise_snr_gain=?,
-                            random_snr=?,
-                            snr=?,
-                            samples=?,
-                            spectral_mask_id=?,
-                            spectral_mask_seed=?,
-                            target_snr_gain=?
-        WHERE ? = mixture.id
-        """,
-        (*from_mixture(mixture), m_id + 1),
-    )
-    con.commit()
-    con.close()
     if not with_data:
-        return GenMixData()
+        return mixture, GenMixData()
     # Apply SNR gains
     targets_audio = [apply_gain(audio=target_audio, gain=mixture.target_snr_gain) for target_audio in targets_audio]
@@ -491,7 +519,7 @@ def update_mixture_table(location: str, m_id: int, with_data: bool = False, test
     target_audio = get_target(mixdb, mixture, targets_audio)
     mixture_audio = target_audio + noise_audio
-    return GenMixData(
+    return mixture, GenMixData(
         mixture=mixture_audio,
         targets=targets_audio,
         target=target_audio,
@@ -511,7 +539,7 @@ def _augmented_noise_audio(mixdb: MixtureDatabase, mixture: Mixture) -> AudioT:
     audio = read_audio(noise.name)
     audio = apply_augmentation(audio, noise_augmentation)
     if noise_augmentation.ir is not None:
-        audio = apply_impulse_response(audio, read_ir(mixdb.impulse_response_file(noise_augmentation.ir)))
+        audio = apply_impulse_response(audio, read_ir(mixdb.impulse_response_file(noise_augmentation.ir)))  # pyright: ignore [reportArgumentType]
     return audio
@@ -540,7 +568,10 @@ def _initialize_targets_audio(mixdb: MixtureDatabase, mixture: Mixture) -> tuple
 def _initialize_mixture_gains(
-    mixdb: MixtureDatabase, mixture: Mixture, target_audio: AudioT, noise_audio: AudioT
+    mixdb: MixtureDatabase,
+    mixture: Mixture,
+    target_audio: AudioT,
+    noise_audio: AudioT,
 ) -> Mixture:
     import numpy as np
@@ -603,7 +634,6 @@ def _initialize_mixture_gains(
 def _exhaustive_noise_mix(
-    location: str,
     augmented_targets: list[AugmentedTarget],
     target_files: list[TargetFile],
     target_augmentations: list[AugmentationRule],
@@ -615,9 +645,8 @@ def _exhaustive_noise_mix(
     num_classes: int,
     feature_step_samples: int,
     num_ir: int,
-    test: bool = False,
-) -> tuple[int, int]:
-    """Use every noise/augmentation with every target/augmentation"""
+) -> tuple[int, int, list[Mixture]]:
+    """Use every noise/augmentation with every target/augmentation+interferences/augmentation"""
     from random import randint
     import numpy as np
@@ -643,6 +672,8 @@ def _exhaustive_noise_mix(
         )
         for mixup in mixups
     ]
+    mixtures: list[Mixture] = []
     for noise_file_id in range(len(noise_files)):
         for noise_augmentation_rule in noise_augmentations:
             noise_augmentation = augmentation_from_rule(noise_augmentation_rule, num_ir)
@@ -665,10 +696,8 @@ def _exhaustive_noise_mix(
                     for spectral_mask_id in range(len(spectral_masks)):
                         for snr in all_snrs:
-                            _insert_mixture_record(
-                                location=location,
-                                m_id=m_id,
-                                mixture=Mixture(
+                            mixtures.append(
+                                Mixture(
                                     targets=targets,
                                     name=str(m_id),
                                     noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -677,19 +706,17 @@ def _exhaustive_noise_mix(
                                     snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
                                     spectral_mask_id=spectral_mask_id + 1,
                                     spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
-                                ),
-                                test=test,
+                                )
                             )
                             m_id += 1
                             noise_offset = int((noise_offset + target_length) % noise_length)
                             used_noise_samples += target_length
-    return used_noise_files, used_noise_samples
+    return used_noise_files, used_noise_samples, mixtures
 def _non_exhaustive_noise_mix(
-    location: str,
     augmented_targets: list[AugmentedTarget],
     target_files: list[TargetFile],
     target_augmentations: list[AugmentationRule],
@@ -701,10 +728,9 @@ def _non_exhaustive_noise_mix(
     num_classes: int,
     feature_step_samples: int,
     num_ir: int,
-    test: bool = False,
-) -> tuple[int, int]:
-    """Cycle through every target/augmentation without necessarily using all noise/augmentation combinations
-    (reduced data set).
+) -> tuple[int, int, list[Mixture]]:
+    """Cycle through every target/augmentation+interferences/augmentation without necessarily using all
+    noise/augmentation combinations (reduced data set).
     """
     from random import randint
@@ -732,6 +758,8 @@ def _non_exhaustive_noise_mix(
         )
         for mixup in mixups
     ]
+    mixtures: list[Mixture] = []
     for mixup in augmented_target_indices_for_mixups:
         for augmented_target_indices in mixup:
             targets, target_length = _get_target_info(
@@ -763,10 +791,8 @@ def _non_exhaustive_noise_mix(
                     used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
-                    _insert_mixture_record(
-                        location=location,
-                        m_id=m_id,
-                        mixture=Mixture(
+                    mixtures.append(
+                        Mixture(
                             targets=targets,
                             name=str(m_id),
                             noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -775,16 +801,14 @@ def _non_exhaustive_noise_mix(
                             snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
                             spectral_mask_id=spectral_mask_id + 1,
                             spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
-                        ),
-                        test=test,
+                        )
                     )
                     m_id += 1
-    return len(used_noise_files), used_noise_samples
+    return len(used_noise_files), used_noise_samples, mixtures
 def _non_combinatorial_noise_mix(
-    location: str,
     augmented_targets: list[AugmentedTarget],
     target_files: list[TargetFile],
     target_augmentations: list[AugmentationRule],
@@ -796,11 +820,10 @@ def _non_combinatorial_noise_mix(
     num_classes: int,
     feature_step_samples: int,
     num_ir: int,
-    test: bool = False,
-) -> tuple[int, int]:
-    """Combine a target/augmentation with a single cut of a noise/augmentation non-exhaustively
-    (each target/augmentation does not use each noise/augmentation). Cut has random start and loop back to
-    beginning if end of noise/augmentation is reached.
+) -> tuple[int, int, list[Mixture]]:
+    """Combine a target/augmentation+interferences/augmentation with a single cut of a noise/augmentation
+    non-exhaustively (each target/augmentation+interferences/augmentation does not use each noise/augmentation).
+    Cut has random start and loop back to beginning if end of noise/augmentation is reached.
     """
     from random import choice
     from random import randint
@@ -828,6 +851,8 @@ def _non_combinatorial_noise_mix(
         )
         for mixup in mixups
     ]
+    mixtures: list[Mixture] = []
     for mixup in augmented_target_indices_for_mixups:
         for augmented_target_indices in mixup:
             targets, target_length = _get_target_info(
@@ -857,10 +882,8 @@ def _non_combinatorial_noise_mix(
                     used_noise_files.add(f"{noise_file_id}_{noise_augmentation_id}")
-                    _insert_mixture_record(
-                        location=location,
-                        m_id=m_id,
-                        mixture=Mixture(
+                    mixtures.append(
+                        Mixture(
                             targets=targets,
                             name=str(m_id),
                             noise=Noise(file_id=noise_file_id + 1, augmentation=noise_augmentation),
@@ -869,12 +892,11 @@ def _non_combinatorial_noise_mix(
                             snr=UniversalSNR(value=snr.value, is_random=snr.is_random),
                             spectral_mask_id=spectral_mask_id + 1,
                             spectral_mask_seed=randint(0, np.iinfo("i").max),  # noqa: S311
-                        ),
-                        test=test,
+                        )
                     )
                     m_id += 1
-    return len(used_noise_files), used_noise_samples
+    return len(used_noise_files), used_noise_samples, mixtures
 def _get_next_noise_indices(
@@ -973,49 +995,6 @@ def _get_target_info(
     return mixups, target_length
-def _insert_mixture_record(location: str, m_id: int, mixture: Mixture, test: bool = False) -> None:
-    from .helpers import from_mixture
-    from .helpers import from_target
-    from .mixdb import db_connection
-    con = db_connection(location=location, readonly=False, test=test)
-    # Populate target table
-    for target in mixture.targets:
-        con.execute(
-            """
-            INSERT OR IGNORE INTO target (file_id, augmentation)
-            VALUES (?, ?)
-            """,
-            from_target(target),
-        )
-    # Populate mixture table
-    con.execute(
-        """
-        INSERT INTO mixture (id, name, noise_file_id, noise_augmentation, noise_offset, noise_snr_gain, random_snr,
-        snr, samples, spectral_mask_id, spectral_mask_seed, target_snr_gain)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-        """,
-        (m_id + 1, *from_mixture(mixture)),
-    )
-    for target in mixture.targets:
-        target_id = con.execute(
-            """
-            SELECT target.id
-            FROM target
-            WHERE ? = target.file_id AND ? = target.augmentation
-        """,
-            from_target(target),
-        ).fetchone()[0]
-        con.execute(
-            "INSERT INTO mixture_target (mixture_id, target_id) VALUES (?, ?)",
-            (m_id + 1, target_id),
-        )
-    con.commit()
-    con.close()
 def get_all_snrs_from_config(config: dict) -> list[UniversalSNRGenerator]:
     from .datatypes import UniversalSNRGenerator

sonusai/mixture/helpers.py CHANGED Viewed

@@ -135,14 +135,20 @@ def mixture_all_speech_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> lis
     return results
-def mixture_metadata(mixdb: MixtureDatabase, m_id: int) -> str:
+def mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: Mixture | None = None) -> str:
     """Create a string of metadata for a Mixture
     :param mixdb: Mixture database
     :param m_id: Mixture ID
+    :param mixture: Mixture record
     :return: String of metadata
     """
-    mixture = mixdb.mixture(m_id)
+    if m_id is not None:
+        mixture = mixdb.mixture(m_id)
+    if mixture is None:
+        raise ValueError("No mixture specified.")
     metadata = ""
     speech_metadata = mixture_all_speech_metadata(mixdb, mixture)
     for mi, target in enumerate(mixture.targets):
@@ -173,17 +179,25 @@ def mixture_metadata(mixdb: MixtureDatabase, m_id: int) -> str:
     return metadata
-def write_mixture_metadata(mixdb: MixtureDatabase, m_id: int) -> None:
+def write_mixture_metadata(mixdb: MixtureDatabase, m_id: int | None = None, mixture: Mixture | None = None) -> None:
     """Write mixture metadata to a text file
     :param mixdb: Mixture database
     :param m_id: Mixture ID
+    :param mixture: Mixture record
     """
     from os.path import join
-    name = join(mixdb.location, "mixture", mixdb.mixture(m_id).name, "metadata.txt")
+    if m_id is not None:
+        name = mixdb.mixture(m_id).name
+    elif mixture is not None:
+        name = mixture.name
+    else:
+        raise ValueError("No mixture specified.")
+    name = join(mixdb.location, "mixture", name, "metadata.txt")
     with open(file=name, mode="w") as f:
-        f.write(mixture_metadata(mixdb, m_id))
+        f.write(mixture_metadata(mixdb, m_id, mixture))
 def from_mixture(
@@ -246,12 +260,13 @@ def to_target(entry: TargetRecord) -> Target:
     )
-def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT]) -> AudioT:
+def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[AudioT], use_cache: bool = True) -> AudioT:
     """Get the augmented target audio data for the given mixture record
     :param mixdb: Mixture database
     :param mixture: Mixture record
     :param targets_audio: List of augmented target audio data (one per target in the mixup)
+    :param use_cache: If true, use LRU caching
     :return: Sum of augmented target audio data
     """
     # Apply impulse responses to targets
@@ -265,7 +280,7 @@ def get_target(mixdb: MixtureDatabase, mixture: Mixture, targets_audio: list[Aud
         ir_idx = mixture.targets[idx].augmentation.ir
         if ir_idx is not None:
             targets_ir.append(
-                apply_impulse_response(audio=target, ir=read_ir(mixdb.impulse_response_file(int(ir_idx))))
+                apply_impulse_response(audio=target, ir=read_ir(mixdb.impulse_response_file(int(ir_idx)), use_cache))  # pyright: ignore [reportArgumentType]
             )
         else:
             targets_ir.append(target)

sonusai 0.19.8__py3-none-any.whl → 0.19.10__py3-none-any.whl

sonusai 0.19.8py3-none-any.whl → 0.19.10py3-none-any.whl