PyPI - sonusai - Versions diffs - 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl - Mend

sonusai 0.19.6py3-none-any.whl → 0.19.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

sonusai/__init__.py +1 -1
sonusai/aawscd_probwrite.py +1 -1
sonusai/calc_metric_spenh.py +1 -1
sonusai/genft.py +29 -14
sonusai/genmetrics.py +60 -42
sonusai/genmix.py +41 -29
sonusai/genmixdb.py +56 -64
sonusai/metrics/calc_class_weights.py +1 -3
sonusai/metrics/calc_optimal_thresholds.py +2 -2
sonusai/metrics/calc_phase_distance.py +1 -1
sonusai/metrics/calc_speech.py +6 -6
sonusai/metrics/class_summary.py +6 -15
sonusai/metrics/confusion_matrix_summary.py +11 -27
sonusai/metrics/one_hot.py +3 -3
sonusai/metrics/snr_summary.py +7 -7
sonusai/mixture/__init__.py +2 -17
sonusai/mixture/augmentation.py +5 -6
sonusai/mixture/class_count.py +1 -1
sonusai/mixture/config.py +36 -46
sonusai/mixture/data_io.py +30 -1
sonusai/mixture/datatypes.py +29 -40
sonusai/mixture/db_datatypes.py +1 -1
sonusai/mixture/feature.py +3 -23
sonusai/mixture/generation.py +161 -204
sonusai/mixture/helpers.py +29 -187
sonusai/mixture/mixdb.py +386 -159
sonusai/mixture/soundfile_audio.py +1 -1
sonusai/mixture/sox_audio.py +4 -4
sonusai/mixture/sox_augmentation.py +1 -1
sonusai/mixture/target_class_balancing.py +9 -11
sonusai/mixture/targets.py +23 -20
sonusai/mixture/torchaudio_audio.py +18 -7
sonusai/mixture/torchaudio_augmentation.py +3 -4
sonusai/mixture/truth.py +21 -34
sonusai/mixture/truth_functions/__init__.py +6 -0
sonusai/mixture/truth_functions/crm.py +51 -37
sonusai/mixture/truth_functions/energy.py +95 -50
sonusai/mixture/truth_functions/file.py +12 -8
sonusai/mixture/truth_functions/metadata.py +24 -0
sonusai/mixture/truth_functions/metrics.py +28 -0
sonusai/mixture/truth_functions/phoneme.py +4 -5
sonusai/mixture/truth_functions/sed.py +32 -23
sonusai/mixture/truth_functions/target.py +62 -29
sonusai/mkwav.py +20 -19
sonusai/queries/queries.py +9 -15
sonusai/speech/l2arctic.py +6 -2
sonusai/summarize_metric_spenh.py +1 -1
sonusai/utils/__init__.py +1 -0
sonusai/utils/asr_functions/aaware_whisper.py +1 -1
sonusai/utils/audio_devices.py +27 -18
sonusai/utils/docstring.py +6 -3
sonusai/utils/energy_f.py +5 -3
sonusai/utils/human_readable_size.py +6 -6
sonusai/utils/load_object.py +15 -0
sonusai/utils/onnx_utils.py +2 -2
sonusai/utils/print_mixture_details.py +3 -3
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/METADATA +2 -2
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/RECORD +60 -58
sonusai/mixture/truth_functions/datatypes.py +0 -37
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/WHEEL +0 -0
{sonusai-0.19.6.dist-info → sonusai-0.19.9.dist-info}/entry_points.txt +0 -0

sonusai/mixture/mixdb.py CHANGED Viewed

@@ -8,27 +8,21 @@ from typing import Any
 from .datatypes import ASRConfigs
 from .datatypes import AudioF
-from .datatypes import AudiosF
-from .datatypes import AudiosT
 from .datatypes import AudioT
 from .datatypes import ClassCount
 from .datatypes import Feature
 from .datatypes import FeatureGeneratorConfig
 from .datatypes import FeatureGeneratorInfo
 from .datatypes import GeneralizedIDs
-from .datatypes import ImpulseResponseFiles
+from .datatypes import ImpulseResponseFile
 from .datatypes import MetricDoc
 from .datatypes import MetricDocs
 from .datatypes import Mixture
-from .datatypes import Mixtures
 from .datatypes import NoiseFile
-from .datatypes import NoiseFiles
 from .datatypes import Segsnr
 from .datatypes import SpectralMask
-from .datatypes import SpectralMasks
 from .datatypes import SpeechMetadata
 from .datatypes import TargetFile
-from .datatypes import TargetFiles
 from .datatypes import TransformConfig
 from .datatypes import TruthConfigs
 from .datatypes import TruthDict
@@ -46,7 +40,13 @@ def db_file(location: str, test: bool = False) -> str:
     return join(location, name)
-def db_connection(location: str, create: bool = False, readonly: bool = True, test: bool = False) -> Connection:
+def db_connection(
+    location: str,
+    create: bool = False,
+    readonly: bool = True,
+    test: bool = False,
+    verbose: bool = False,
+) -> Connection:
     import sqlite3
     from os import remove
     from os.path import exists
@@ -62,7 +62,10 @@ def db_connection(location: str, create: bool = False, readonly: bool = True, te
         name += "?mode=ro"
     connection = sqlite3.connect("file:" + name, uri=True)
-    # connection.set_trace_callback(print)
+    if verbose:
+        connection.set_trace_callback(print)
     return connection
@@ -82,8 +85,30 @@ class SQLiteContextManager:
 class MixtureDatabase:
     def __init__(self, location: str, test: bool = False) -> None:
+        import json
+        from os.path import exists
+        from .config import load_config
         self.location = location
-        self.db = partial(SQLiteContextManager, self.location, test)
+        self.test = test
+        if not exists(db_file(self.location, self.test)):
+            raise OSError(f"Could not find mixture database in {self.location}")
+        self.db = partial(SQLiteContextManager, self.location, self.test)
+        # Check config.yml to see if asr_configs has changed and update database if needed
+        config = load_config(self.location)
+        new_asr_configs = json.dumps(config["asr_configs"])
+        with self.db() as c:
+            old_asr_configs = c.execute("SELECT top.asr_configs FROM top").fetchone()
+        if old_asr_configs is not None and new_asr_configs != old_asr_configs[0]:
+            con = db_connection(location=self.location, readonly=False, test=self.test)
+            con.execute("UPDATE top SET asr_configs = ? WHERE ? = id", (new_asr_configs,))
+            con.commit()
+            con.close()
     @cached_property
     def json(self) -> str:
@@ -127,10 +152,10 @@ class MixtureDatabase:
         return get_feature_generator_info(self.fg_config)
     @cached_property
-    def truth_parameters(self) -> dict[str, int]:
+    def truth_parameters(self) -> dict[str, int | None]:
         with self.db() as c:
             rows = c.execute("SELECT * FROM truth_parameters").fetchall()
-            truth_parameters: dict[str, int] = {}
+            truth_parameters: dict[str, int | None] = {}
             for row in rows:
                 truth_parameters[row[1]] = row[2]
             return truth_parameters
@@ -197,48 +222,58 @@ class MixtureDatabase:
                     "mxssnrdbf_std",
                     "Per-bin segmental standard deviation of the dB frame values over all frames (using feature transform)",
                 ),
-                MetricDoc("Mixture Metrics", "mxpesq", "PESQ of mixture versus true target[0]"),
+                MetricDoc("Mixture Metrics", "mxpesq", "PESQ of mixture versus true targets"),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxwsdr",
-                    "Weighted signal distorion ratio of mixture versus true target[0]",
+                    "Weighted signal distortion ratio of mixture versus true targets",
                 ),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxpd",
-                    "Phase distance between mixture and true target[0]",
+                    "Phase distance between mixture and true targets",
                 ),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxstoi",
-                    "Short term objective intelligibility of mixture versus true target[0]",
+                    "Short term objective intelligibility of mixture versus true targets",
                 ),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxcsig",
-                    "Predicted rating of speech distortion of mixture versus true target[0]",
+                    "Predicted rating of speech distortion of mixture versus true targets",
                 ),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxcbak",
-                    "Predicted rating of background distortion of mixture versus true target[0]",
+                    "Predicted rating of background distortion of mixture versus true targets",
                 ),
                 MetricDoc(
                     "Mixture Metrics",
                     "mxcovl",
-                    "Predicted rating of overall quality of mixture versus true target[0]",
+                    "Predicted rating of overall quality of mixture versus true targets",
                 ),
                 MetricDoc("Mixture Metrics", "ssnr", "Segmental SNR"),
-                MetricDoc("Target Metrics", "tdco", "Target[0] DC offset"),
-                MetricDoc("Target Metrics", "tmin", "Target[0] min level"),
-                MetricDoc("Target Metrics", "tmax", "Target[0] max levl"),
-                MetricDoc("Target Metrics", "tpkdb", "Target[0] Pk lev dB"),
-                MetricDoc("Target Metrics", "tlrms", "Target[0] RMS lev dB"),
-                MetricDoc("Target Metrics", "tpkr", "Target[0] RMS Pk dB"),
-                MetricDoc("Target Metrics", "ttr", "Target[0] RMS Tr dB"),
-                MetricDoc("Target Metrics", "tcr", "Target[0] Crest factor"),
-                MetricDoc("Target Metrics", "tfl", "Target[0] Flat factor"),
-                MetricDoc("Target Metrics", "tpkc", "Target[0] Pk count"),
+                MetricDoc("Mixture Metrics", "mxtdco", "Mixture target DC offset"),
+                MetricDoc("Mixture Metrics", "mxtmin", "Mixture target min level"),
+                MetricDoc("Mixture Metrics", "mxtmax", "Mixture target max levl"),
+                MetricDoc("Mixture Metrics", "mxtpkdb", "Mixture target Pk lev dB"),
+                MetricDoc("Mixture Metrics", "mxtlrms", "Mixture target RMS lev dB"),
+                MetricDoc("Mixture Metrics", "mxtpkr", "Mixture target RMS Pk dB"),
+                MetricDoc("Mixture Metrics", "mxttr", "Mixture target RMS Tr dB"),
+                MetricDoc("Mixture Metrics", "mxtcr", "Mixture target Crest factor"),
+                MetricDoc("Mixture Metrics", "mxtfl", "Mixture target Flat factor"),
+                MetricDoc("Mixture Metrics", "mxtpkc", "Mixture target Pk count"),
+                MetricDoc("Targets Metrics", "tdco", "Targets DC offset"),
+                MetricDoc("Targets Metrics", "tmin", "Targets min level"),
+                MetricDoc("Targets Metrics", "tmax", "Targets max levl"),
+                MetricDoc("Targets Metrics", "tpkdb", "Targets Pk lev dB"),
+                MetricDoc("Targets Metrics", "tlrms", "Targets RMS lev dB"),
+                MetricDoc("Targets Metrics", "tpkr", "Targets RMS Pk dB"),
+                MetricDoc("Targets Metrics", "ttr", "Targets RMS Tr dB"),
+                MetricDoc("Targets Metrics", "tcr", "Targets Crest factor"),
+                MetricDoc("Targets Metrics", "tfl", "Targets Flat factor"),
+                MetricDoc("Targets Metrics", "tpkc", "Targets Pk count"),
                 MetricDoc("Noise Metrics", "ndco", "Noise DC offset"),
                 MetricDoc("Noise Metrics", "nmin", "Noise min level"),
                 MetricDoc("Noise Metrics", "nmax", "Noise max levl"),
@@ -272,11 +307,18 @@ class MixtureDatabase:
             ]
         )
         for name in self.asr_configs:
+            metrics.append(
+                MetricDoc(
+                    "Target Metrics",
+                    f"mxtasr.{name}",
+                    f"Mixture Target ASR text using {name} ASR as defined in mixdb asr_configs parameter",
+                )
+            )
             metrics.append(
                 MetricDoc(
                     "Target Metrics",
                     f"tasr.{name}",
-                    f"Target[0] ASR text using {name} ASR as defined in mixdb asr_configs parameter",
+                    f"Targets ASR text using {name} ASR as defined in mixdb asr_configs parameter",
                 )
             )
             metrics.append(
@@ -486,7 +528,7 @@ class MixtureDatabase:
         )
     @cached_property
-    def spectral_masks(self) -> SpectralMasks:
+    def spectral_masks(self) -> list[SpectralMask]:
         """Get spectral masks from db
         :return: Spectral masks
@@ -517,7 +559,7 @@ class MixtureDatabase:
         return _spectral_mask(self.db, sm_id)
     @cached_property
-    def target_files(self) -> TargetFiles:
+    def target_files(self) -> list[TargetFile]:
         """Get target files from db
         :return: Target files
@@ -529,17 +571,19 @@ class MixtureDatabase:
         from .db_datatypes import TargetFileRecord
         with self.db() as c:
-            target_files: TargetFiles = []
+            target_files: list[TargetFile] = []
             target_file_records = [
                 TargetFileRecord(*result) for result in c.execute("SELECT * FROM target_file").fetchall()
             ]
             for target_file_record in target_file_records:
                 truth_configs: TruthConfigs = {}
                 for truth_config_records in c.execute(
-                    "SELECT truth_config.config "
-                    + "FROM truth_config, target_file_truth_config "
-                    + "WHERE ? = target_file_truth_config.target_file_id "
-                    + "AND truth_config.id = target_file_truth_config.truth_config_id",
+                    """
+                    SELECT truth_config.config
+                    FROM truth_config, target_file_truth_config
+                    WHERE ? = target_file_truth_config.target_file_id
+                    AND truth_config.id = target_file_truth_config.truth_config_id
+                    """,
                     (target_file_record.id,),
                 ).fetchall():
                     truth_config = json.loads(truth_config_records[0])
@@ -587,7 +631,7 @@ class MixtureDatabase:
             return int(c.execute("SELECT count(target_file.id) FROM target_file").fetchone()[0])
     @cached_property
-    def noise_files(self) -> NoiseFiles:
+    def noise_files(self) -> list[NoiseFile]:
         """Get noise files from db
         :return: Noise files
@@ -625,7 +669,7 @@ class MixtureDatabase:
             return int(c.execute("SELECT count(noise_file.id) FROM noise_file").fetchone()[0])
     @cached_property
-    def impulse_response_files(self) -> ImpulseResponseFiles:
+    def impulse_response_files(self) -> list[ImpulseResponseFile]:
         """Get impulse response files from db
         :return: Impulse response files
@@ -635,10 +679,6 @@ class MixtureDatabase:
         from .datatypes import ImpulseResponseFile
         with self.db() as c:
-            # for impulse_response in c.execute(
-            #         "SELECT impulse_response_file.* FROM impulse_response_file"
-            # ).fetchall():
-            #     print(impulse_response)
             return [
                 ImpulseResponseFile(impulse_response[1], json.loads(impulse_response[2]))
                 for impulse_response in c.execute(
@@ -678,7 +718,7 @@ class MixtureDatabase:
             return int(c.execute("SELECT count(impulse_response_file.id) FROM impulse_response_file").fetchone()[0])
     @cached_property
-    def mixtures(self) -> Mixtures:
+    def mixtures(self) -> list[Mixture]:
         """Get mixtures from db
         :return: Mixtures
@@ -689,13 +729,16 @@ class MixtureDatabase:
         from .helpers import to_target
         with self.db() as c:
-            mixtures: Mixtures = []
+            mixtures: list[Mixture] = []
             for mixture in [MixtureRecord(*record) for record in c.execute("SELECT * FROM mixture").fetchall()]:
                 targets = [
                     to_target(TargetRecord(*target))
                     for target in c.execute(
-                        "SELECT target.* FROM target, mixture_target "
-                        + "WHERE ? = mixture_target.mixture_id AND target.id = mixture_target.target_id",
+                        """
+                        SELECT target.*
+                        FROM target, mixture_target
+                        WHERE ? = mixture_target.mixture_id AND target.id = mixture_target.target_id
+                        """,
                         (mixture.id,),
                     ).fetchall()
                 ]
@@ -744,7 +787,7 @@ class MixtureDatabase:
             return int(c.execute("SELECT count(mixture.id) FROM mixture").fetchone()[0])
     def read_mixture_data(self, m_id: int, items: list[str] | str) -> Any:
-        """Read mixture data from a mixture HDF5 file
+        """Read mixture data
         :param m_id: Zero-based mixture ID
         :param items: String(s) of dataset(s) to retrieve
@@ -792,7 +835,7 @@ class MixtureDatabase:
             class_indices.extend(self.target_file(t_id).class_indices)
         return sorted(set(class_indices))
-    def mixture_targets(self, m_id: int, force: bool = False) -> AudiosT:
+    def mixture_targets(self, m_id: int, force: bool = False) -> list[AudioT]:
         """Get the list of augmented target audio data (one per target in the mixup) for the given mixture ID
         :param m_id: Zero-based mixture ID
@@ -826,7 +869,7 @@ class MixtureDatabase:
         return targets_audio
-    def mixture_targets_f(self, m_id: int, targets: AudiosT | None = None, force: bool = False) -> AudiosF:
+    def mixture_targets_f(self, m_id: int, targets: list[AudioT] | None = None, force: bool = False) -> list[AudioF]:
         """Get the list of augmented target transform data (one per target in the mixup) for the given mixture ID
         :param m_id: Zero-based mixture ID
@@ -841,7 +884,7 @@ class MixtureDatabase:
         return [forward_transform(target, self.ft_config) for target in targets]
-    def mixture_target(self, m_id: int, targets: AudiosT | None = None, force: bool = False) -> AudioT:
+    def mixture_target(self, m_id: int, targets: list[AudioT] | None = None, force: bool = False) -> AudioT:
         """Get the augmented target audio data for the given mixture ID
         :param m_id: Zero-based mixture ID
@@ -864,7 +907,7 @@ class MixtureDatabase:
     def mixture_target_f(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         force: bool = False,
     ) -> AudioF:
@@ -900,7 +943,7 @@ class MixtureDatabase:
         mixture = self.mixture(m_id)
         noise = self.augmented_noise_audio(mixture)
-        noise = get_next_noise(audio=noise, offset=mixture.noise.offset, length=mixture.samples)
+        noise = get_next_noise(audio=noise, offset=mixture.noise_offset, length=mixture.samples)
         return apply_gain(audio=noise, gain=mixture.noise_snr_gain)
     def mixture_noise_f(self, m_id: int, noise: AudioT | None = None, force: bool = False) -> AudioF:
@@ -921,7 +964,7 @@ class MixtureDatabase:
     def mixture_mixture(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         noise: AudioT | None = None,
         force: bool = False,
@@ -951,7 +994,7 @@ class MixtureDatabase:
     def mixture_mixture_f(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         noise: AudioT | None = None,
         mixture: AudioT | None = None,
@@ -988,11 +1031,11 @@ class MixtureDatabase:
     def mixture_truth_t(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         noise: AudioT | None = None,
         mixture: AudioT | None = None,
         force: bool = False,
-    ) -> TruthDict:
+    ) -> list[TruthDict]:
         """Get the truth_t data for the given mixture ID
         :param m_id: Zero-based mixture ID
@@ -1000,9 +1043,9 @@ class MixtureDatabase:
         :param noise: Augmented noise audio data for the given mixture ID
         :param mixture: Mixture audio data for the given mixture ID
         :param force: Force computing data from original sources regardless of whether cached data exists
-        :return: truth_t data
+        :return: list of truth_t data
         """
-        from .helpers import get_truth
+        from .truth import truth_function
         if not force:
             truth_t = self.read_mixture_data(m_id, "truth_t")
@@ -1018,12 +1061,18 @@ class MixtureDatabase:
         if force or mixture is None:
             mixture = self.mixture_mixture(m_id, targets=targets, noise=noise, force=force)
-        return get_truth(self, self.mixture(m_id), targets, noise, mixture)
+        if not all(len(target) == self.mixture(m_id).samples for target in targets):
+            raise ValueError("Lengths of targets do not match length of mixture")
+        if len(noise) != self.mixture(m_id).samples:
+            raise ValueError("Length of noise does not match length of mixture")
+        return truth_function(self, m_id)
     def mixture_segsnr_t(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         noise: AudioT | None = None,
         force: bool = False,
@@ -1037,7 +1086,9 @@ class MixtureDatabase:
         :param force: Force computing data from original sources regardless of whether cached data exists
         :return: segsnr_t data
         """
-        from .helpers import get_segsnr_t
+        import numpy as np
+        import torch
+        from pyaaware import ForwardTransform
         if not force:
             segsnr_t = self.read_mixture_data(m_id, "segsnr_t")
@@ -1050,13 +1101,45 @@ class MixtureDatabase:
         if force or noise is None:
             noise = self.mixture_noise(m_id, force)
-        return get_segsnr_t(self, self.mixture(m_id), target, noise)
+        ft = ForwardTransform(
+            length=self.ft_config.length,
+            overlap=self.ft_config.overlap,
+            bin_start=self.ft_config.bin_start,
+            bin_end=self.ft_config.bin_end,
+            ttype=self.ft_config.ttype,
+        )
+        mixture = self.mixture(m_id)
+        segsnr_t = np.empty(mixture.samples, dtype=np.float32)
+        target_energy = ft.execute_all(torch.from_numpy(target))[1].numpy()
+        noise_energy = ft.execute_all(torch.from_numpy(noise))[1].numpy()
+        offsets = range(0, mixture.samples, self.ft_config.overlap)
+        if len(target_energy) != len(offsets):
+            raise ValueError(
+                f"Number of frames in energy, {len(target_energy)},"
+                f" is not number of frames in mixture, {len(offsets)}"
+            )
+        for idx, offset in enumerate(offsets):
+            indices = slice(offset, offset + self.ft_config.overlap)
+            if noise_energy[idx] == 0:
+                snr = np.float32(np.inf)
+            else:
+                snr = np.float32(target_energy[idx] / noise_energy[idx])
+            segsnr_t[indices] = snr
+        return segsnr_t
     def mixture_segsnr(
         self,
         m_id: int,
         segsnr_t: Segsnr | None = None,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         noise: AudioT | None = None,
         force: bool = False,
@@ -1088,12 +1171,12 @@ class MixtureDatabase:
     def mixture_ft(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         target: AudioT | None = None,
         noise: AudioT | None = None,
         mixture_f: AudioF | None = None,
         mixture: AudioT | None = None,
-        truth_t: TruthDict | None = None,
+        truth_t: list[TruthDict] | None = None,
         force: bool = False,
     ) -> tuple[Feature, TruthDict]:
         """Get the feature and truth_f data for the given mixture ID
@@ -1132,19 +1215,24 @@ class MixtureDatabase:
         fg = FeatureGenerator(self.fg_config.feature_mode, self.fg_config.truth_parameters)
-        feature, truth_f = fg.execute_all(mixture_f, truth_t)
-        for key in self.truth_configs:
-            truth_f[key] = truth_stride_reduction(truth_f[key], self.truth_configs[key].stride_reduction)
+        # TODO: handle mixup in truth_t
+        feature, truth_f = fg.execute_all(mixture_f, truth_t[0])
+        if truth_f is not None:
+            for key in self.truth_configs:
+                if self.truth_parameters[key] is not None:
+                    truth_f[key] = truth_stride_reduction(truth_f[key], self.truth_configs[key].stride_reduction)
+        else:
+            raise TypeError("Unexpected truth of None from feature generator")
         return feature, truth_f
     def mixture_feature(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         noise: AudioT | None = None,
         mixture: AudioT | None = None,
-        truth_t: TruthDict | None = None,
+        truth_t: list[TruthDict] | None = None,
         force: bool = False,
     ) -> Feature:
         """Get the feature data for the given mixture ID
@@ -1170,10 +1258,10 @@ class MixtureDatabase:
     def mixture_truth_f(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         noise: AudioT | None = None,
         mixture: AudioT | None = None,
-        truth_t: TruthDict | None = None,
+        truth_t: list[TruthDict] | None = None,
         force: bool = False,
     ) -> TruthDict:
         """Get the truth_f data for the given mixture ID
@@ -1199,9 +1287,9 @@ class MixtureDatabase:
     def mixture_class_count(
         self,
         m_id: int,
-        targets: AudiosT | None = None,
+        targets: list[AudioT] | None = None,
         noise: AudioT | None = None,
-        truth_t: TruthDict | None = None,
+        truth_t: list[TruthDict] | None = None,
     ) -> ClassCount:
         """Compute the number of frames for which each class index is active for the given mixture ID
@@ -1220,7 +1308,8 @@ class MixtureDatabase:
         num_classes = self.num_classes
         if "sed" in self.truth_configs:
             for cl in range(num_classes):
-                class_count[cl] = int(np.sum(truth_t["sed"][:, cl] >= self.class_weights_thresholds[cl]))
+                # TODO: handle mixup in truth_t
+                class_count[cl] = int(np.sum(truth_t[0]["sed"][:, cl] >= self.class_weights_thresholds[cl]))
         return class_count
@@ -1300,7 +1389,12 @@ class MixtureDatabase:
         return results
-    def mixids_for_speech_metadata(self, tier: str, value: str | None = None, where: str | None = None) -> list[int]:
+    def mixids_for_speech_metadata(
+        self,
+        tier: str | None = None,
+        value: str | None = None,
+        where: str | None = None,
+    ) -> list[int]:
         """Get a list of mixture IDs for the given speech metadata tier.
         If 'where' is None, then include mixture IDs whose tier values are equal to the given 'value'.
@@ -1310,35 +1404,35 @@ class MixtureDatabase:
         Examples:
         >>> mixdb = MixtureDatabase('/mixdb_location')
-        >>> mixids = mixdb.mixids_for_speech_metadata('speaker_id', 'TIMIT_ARC0')
-        Get mixutre IDs for mixtures with speakers whose speaker_ids are 'TIMIT_ARC0'.
+        >>> mixids = mixdb.mixids_for_speech_metadata('speaker_id', 'TIMIT_ABW0')
+        Get mixture IDs for mixtures with speakers whose speaker_ids are 'TIMIT_ABW0'.
-        >>> mixids = mixdb.mixids_for_speech_metadata('age', where='age < 25')
-        Get mixture IDs for mixtures with speakers whose ages are less than 25.
+        >>> mixids = mixdb.mixids_for_speech_metadata(where='age >= 27')
+        Get mixture IDs for mixtures with speakers whose ages are greater than or equal to 27.
-        >>> mixids = mixdb.mixids_for_speech_metadata('dialect', where="dialect in ('New York City', 'Northern')")
+        >>> mixids = mixdb.mixids_for_speech_metadata(where="dialect in ('New York City', 'Northern')")
         Get mixture IDs for mixtures with speakers whose dialects are either 'New York City' or 'Northern'.
         """
         if value is None and where is None:
             raise ValueError("Must provide either value or where")
         if where is None:
+            if tier is None:
+                raise ValueError("Must provide tier")
             where = f"{tier} = '{value}'"
-        if tier in self.textgrid_metadata_tiers:
+        if tier is not None and tier in self.textgrid_metadata_tiers:
             raise ValueError(f"TextGrid tier data, '{tier}', is not supported in mixids_for_speech_metadata().")
         with self.db() as c:
-            speaker_ids = [
-                speaker_id[0] for speaker_id in c.execute(f"SELECT id FROM speaker WHERE {where}").fetchall()
-            ]
-            results = c.execute(
-                "SELECT id FROM target_file " + f"WHERE speaker_id IN ({','.join(map(str, speaker_ids))})"
-            ).fetchall()
-            target_file_ids = [target_file_id[0] for target_file_id in results]
+            results = c.execute(f"SELECT id FROM speaker WHERE {where}").fetchall()
+            speaker_ids = ",".join(map(str, [i[0] for i in results]))
+            results = c.execute(f"SELECT id FROM target_file WHERE speaker_id IN ({speaker_ids})").fetchall()
+            target_file_ids = ",".join(map(str, [i[0] for i in results]))
             results = c.execute(
-                "SELECT mixture_id FROM mixture_target "
-                + f"WHERE mixture_target.target_id IN ({','.join(map(str, target_file_ids))})"
+                f"SELECT mixture_id FROM mixture_target WHERE mixture_target.target_id IN ({target_file_ids})"
             ).fetchall()
         return [mixture_id[0] - 1 for mixture_id in results]
@@ -1348,9 +1442,29 @@ class MixtureDatabase:
         return mixture_all_speech_metadata(self, self.mixture(m_id))
-    def mixture_metrics(
-        self, m_id: int, metrics: list[str], force: bool = False
-    ) -> list[float | int | str | Segsnr | None]:
+    def cached_metrics(self, m_ids: GeneralizedIDs = "*") -> list[str]:
+        """Get list of cached metrics for all mixtures."""
+        from glob import glob
+        from os.path import join
+        from pathlib import Path
+        supported_metrics = self.supported_metrics.names
+        first = True
+        result: set[str] = set()
+        for m_id in self.mixids_to_list(m_ids):
+            mixture_dir = join(self.location, "mixture", self.mixture(m_id).name)
+            found = {Path(f).stem for f in glob(join(mixture_dir, "*.pkl"))}
+            if first:
+                first = False
+                for f in found:
+                    if f in supported_metrics:
+                        result.add(f)
+            else:
+                result = result & found
+        return sorted(result)
+    def mixture_metrics(self, m_id: int, metrics: list[str], force: bool = False) -> list[Any]:
         """Get metrics data for the given mixture ID
         :param m_id: Zero-based mixture ID
@@ -1375,10 +1489,23 @@ class MixtureDatabase:
         from sonusai.mixture import SpeechMetrics
         from sonusai.utils import calc_asr
-        def create_target_audio() -> Callable[[], np.ndarray]:
-            state = None
+        def create_targets_audio() -> Callable[[], list[AudioT]]:
+            state: list[AudioT] | None = None
+            def get() -> list[AudioT]:
+                nonlocal state
+                if state is None:
+                    state = self.mixture_targets(m_id)
+                return state
+            return get
+        targets_audio = create_targets_audio()
+        def create_target_audio() -> Callable[[], AudioT]:
+            state: AudioT | None = None
-            def get() -> np.ndarray:
+            def get() -> AudioT:
                 nonlocal state
                 if state is None:
                     state = self.mixture_target(m_id)
@@ -1388,10 +1515,10 @@ class MixtureDatabase:
         target_audio = create_target_audio()
-        def create_target_f() -> Callable[[], np.ndarray]:
-            state = None
+        def create_target_f() -> Callable[[], AudioF]:
+            state: AudioF | None = None
-            def get() -> np.ndarray:
+            def get() -> AudioF:
                 nonlocal state
                 if state is None:
                     state = self.mixture_targets_f(m_id)[0]
@@ -1401,10 +1528,10 @@ class MixtureDatabase:
         target_f = create_target_f()
-        def create_noise_audio() -> Callable[[], np.ndarray]:
-            state = None
+        def create_noise_audio() -> Callable[[], AudioT]:
+            state: AudioT | None = None
-            def get() -> np.ndarray:
+            def get() -> AudioT:
                 nonlocal state
                 if state is None:
                     state = self.mixture_noise(m_id)
@@ -1414,10 +1541,10 @@ class MixtureDatabase:
         noise_audio = create_noise_audio()
-        def create_noise_f() -> Callable[[], np.ndarray]:
-            state = None
+        def create_noise_f() -> Callable[[], AudioF]:
+            state: AudioF | None = None
-            def get() -> np.ndarray:
+            def get() -> AudioF:
                 nonlocal state
                 if state is None:
                     state = self.mixture_noise_f(m_id)
@@ -1427,10 +1554,10 @@ class MixtureDatabase:
         noise_f = create_noise_f()
-        def create_mixture_audio() -> Callable[[], np.ndarray]:
-            state = None
+        def create_mixture_audio() -> Callable[[], AudioT]:
+            state: AudioT | None = None
-            def get() -> np.ndarray:
+            def get() -> AudioT:
                 nonlocal state
                 if state is None:
                     state = self.mixture_mixture(m_id)
@@ -1440,10 +1567,10 @@ class MixtureDatabase:
         mixture_audio = create_mixture_audio()
-        def create_segsnr_f() -> Callable[[], np.ndarray]:
-            state = None
+        def create_segsnr_f() -> Callable[[], Segsnr]:
+            state: Segsnr | None = None
-            def get() -> np.ndarray:
+            def get() -> Segsnr:
                 nonlocal state
                 if state is None:
                     state = self.mixture_segsnr(m_id)
@@ -1453,21 +1580,38 @@ class MixtureDatabase:
         segsnr_f = create_segsnr_f()
-        def create_speech() -> Callable[[], SpeechMetrics]:
-            state = None
+        def create_speech() -> Callable[[], list[SpeechMetrics]]:
+            state: list[SpeechMetrics] | None = None
-            def get() -> SpeechMetrics:
+            def get() -> list[SpeechMetrics]:
                 nonlocal state
                 if state is None:
-                    state = calc_speech(hypothesis=mixture_audio(), reference=target_audio())
+                    state = []
+                    for audio in targets_audio():
+                        state.append(calc_speech(hypothesis=mixture_audio(), reference=audio))
                 return state
             return get
         speech = create_speech()
+        def create_targets_stats() -> Callable[[], list[AudioStatsMetrics]]:
+            state: list[AudioStatsMetrics] | None = None
+            def get() -> list[AudioStatsMetrics]:
+                nonlocal state
+                if state is None:
+                    state = []
+                    for audio in targets_audio():
+                        state.append(calc_audio_stats(audio, self.fg_info.ft_config.length / SAMPLE_RATE))
+                return state
+            return get
+        targets_stats = create_targets_stats()
         def create_target_stats() -> Callable[[], AudioStatsMetrics]:
-            state = None
+            state: AudioStatsMetrics | None = None
             def get() -> AudioStatsMetrics:
                 nonlocal state
@@ -1480,7 +1624,7 @@ class MixtureDatabase:
         target_stats = create_target_stats()
         def create_noise_stats() -> Callable[[], AudioStatsMetrics]:
-            state = None
+            state: AudioStatsMetrics | None = None
             def get() -> AudioStatsMetrics:
                 nonlocal state
@@ -1508,6 +1652,21 @@ class MixtureDatabase:
         asr_config = create_asr_config()
+        def create_targets_asr() -> Callable[[str], list[str]]:
+            state: dict[str, list[str]] = {}
+            def get(asr_name) -> list[str]:
+                nonlocal state
+                if asr_name not in state:
+                    state[asr_name] = []
+                    for audio in targets_audio():
+                        state[asr_name].append(calc_asr(audio, **asr_config(asr_name)).text)
+                return state[asr_name]
+            return get
+        targets_asr = create_targets_asr()
         def create_target_asr() -> Callable[[str], str]:
             state: dict[str, str] = {}
@@ -1541,7 +1700,7 @@ class MixtureDatabase:
             asr_name = parts[1]
             return asr_name
-        def calc(m: str) -> float | int | str | Segsnr | None:
+        def calc(m: str) -> Any:
             if m == "mxsnr":
                 return self.mixture(m_id).snr
@@ -1555,7 +1714,7 @@ class MixtureDatabase:
             if m.startswith("mxwer"):
                 asr_name = get_asr_name(m)
-                if self.mixture(m_id).snr < -96:
+                if self.mixture(m_id).is_noise_only:
                     # noise only, ignore/reset target asr
                     return float("nan")
@@ -1569,11 +1728,11 @@ class MixtureDatabase:
                 asr_name = get_asr_name(m)
                 text = self.mixture_speech_metadata(m_id, "text")[0]
-                if text is not None:
-                    return calc_wer(target_asr(asr_name), text).wer * 100
+                if not isinstance(text, str):
+                    # TODO: should this be NaN like above?
+                    return [float(0)] * len(targets_audio())
-                # TODO: should this be NaN like above?
-                return float(0)
+                return [calc_wer(t, text).wer * 100 for t in targets_asr(asr_name)]
             if m.startswith("mxasr"):
                 return mixture_asr(get_asr_name(m))
@@ -1603,24 +1762,24 @@ class MixtureDatabase:
                 return calc_segsnr_f_bin(target_f(), noise_f()).db_std
             if m == "mxpesq":
-                if self.mixture(m_id).snr < -96:
-                    return 0
-                return speech().pesq
+                if self.mixture(m_id).is_noise_only:
+                    return [0] * len(speech())
+                return [s.pesq for s in speech()]
             if m == "mxcsig":
-                if self.mixture(m_id).snr < -96:
-                    return 0
-                return speech().csig
+                if self.mixture(m_id).is_noise_only:
+                    return [0] * len(speech())
+                return [s.csig for s in speech()]
             if m == "mxcbak":
-                if self.mixture(m_id).snr < -96:
-                    return 0
-                return speech().cbak
+                if self.mixture(m_id).is_noise_only:
+                    return [0] * len(speech())
+                return [s.cbak for s in speech()]
             if m == "mxcovl":
-                if self.mixture(m_id).snr < -96:
-                    return 0
-                return speech().covl
+                if self.mixture(m_id).is_noise_only:
+                    return [0] * len(speech())
+                return [s.covl for s in speech()]
             if m == "mxwsdr":
                 mixture = mixture_audio()[:, np.newaxis]
@@ -1644,37 +1803,70 @@ class MixtureDatabase:
                     extended=False,
                 )
-            if m == "tdco":
+            if m == "mxtdco":
                 return target_stats().dco
-            if m == "tmin":
+            if m == "mxtmin":
                 return target_stats().min
-            if m == "tmax":
+            if m == "mxtmax":
                 return target_stats().max
-            if m == "tpkdb":
+            if m == "mxtpkdb":
                 return target_stats().pkdb
-            if m == "tlrms":
+            if m == "mxtlrms":
                 return target_stats().lrms
-            if m == "tpkr":
+            if m == "mxtpkr":
                 return target_stats().pkr
-            if m == "ttr":
+            if m == "mxttr":
                 return target_stats().tr
-            if m == "tcr":
+            if m == "mxtcr":
                 return target_stats().cr
-            if m == "tfl":
+            if m == "mxtfl":
                 return target_stats().fl
-            if m == "tpkc":
+            if m == "mxtpkc":
                 return target_stats().pkc
+            if m == "tdco":
+                return [t.dco for t in targets_stats()]
+            if m == "tmin":
+                return [t.min for t in targets_stats()]
+            if m == "tmax":
+                return [t.max for t in targets_stats()]
+            if m == "tpkdb":
+                return [t.pkdb for t in targets_stats()]
+            if m == "tlrms":
+                return [t.lrms for t in targets_stats()]
+            if m == "tpkr":
+                return [t.pkr for t in targets_stats()]
+            if m == "ttr":
+                return [t.tr for t in targets_stats()]
+            if m == "tcr":
+                return [t.cr for t in targets_stats()]
+            if m == "tfl":
+                return [t.fl for t in targets_stats()]
+            if m == "tpkc":
+                return [t.pkc for t in targets_stats()]
             if m.startswith("tasr"):
+                return targets_asr(get_asr_name(m))
+            if m.startswith("mxtasr"):
                 return target_asr(get_asr_name(m))
             if m == "ndco":
@@ -1743,7 +1935,14 @@ def _spectral_mask(db: partial, sm_id: int) -> SpectralMask:
     with db() as c:
         spectral_mask = SpectralMaskRecord(
-            *c.execute("SELECT * FROM spectral_mask WHERE ? = spectral_mask.id", (sm_id,)).fetchone()
+            *c.execute(
+                """
+                SELECT *
+                FROM spectral_mask
+                WHERE ? = spectral_mask.id
+                """,
+                (sm_id,),
+            ).fetchone()
         )
         return SpectralMask(
             f_max_width=spectral_mask.f_max_width,
@@ -1768,7 +1967,14 @@ def _target_file(db: partial, t_id: int) -> TargetFile:
     with db() as c:
         target_file = TargetFileRecord(
-            *c.execute("SELECT * FROM target_file WHERE ? = target_file.id", (t_id,)).fetchone()
+            *c.execute(
+                """
+                SELECT *
+                FROM target_file
+                WHERE ? = target_file.id
+                """,
+                (t_id,),
+            ).fetchone()
         )
         return TargetFile(
@@ -1791,7 +1997,11 @@ def _noise_file(db: partial, n_id: int) -> NoiseFile:
     """
     with db() as c:
         noise = c.execute(
-            "SELECT noise_file.name, samples FROM noise_file WHERE ? = noise_file.id",
+            """
+            SELECT noise_file.name, samples
+            FROM noise_file
+            WHERE ? = noise_file.id
+            """,
             (n_id,),
         ).fetchone()
         return NoiseFile(name=noise[0], samples=noise[1])
@@ -1808,7 +2018,11 @@ def _impulse_response_file(db: partial, ir_id: int) -> str:
     with db() as c:
         return str(
             c.execute(
-                "SELECT impulse_response_file.file FROM impulse_response_file WHERE ? = impulse_response_file.id",
+                """
+                SELECT impulse_response_file.file
+                FROM impulse_response_file
+                WHERE ? = impulse_response_file.id
+                """,
                 (ir_id + 1,),
             ).fetchone()[0]
         )
@@ -1828,13 +2042,25 @@ def _mixture(db: partial, m_id: int) -> Mixture:
     from .helpers import to_target
     with db() as c:
-        mixture = MixtureRecord(*c.execute("SELECT * FROM mixture WHERE ? = mixture.id", (m_id + 1,)).fetchone())
+        mixture = MixtureRecord(
+            *c.execute(
+                """
+                SELECT *
+                FROM mixture
+                WHERE ? = mixture.id
+                """,
+                (m_id + 1,),
+            ).fetchone()
+        )
         targets = [
             to_target(TargetRecord(*target))
             for target in c.execute(
-                "SELECT target.* "
-                + "FROM target, mixture_target "
-                + "WHERE ? = mixture_target.mixture_id AND target.id = mixture_target.target_id",
+                """
+                SELECT target.*
+                FROM target, mixture_target
+                WHERE ? = mixture_target.mixture_id AND target.id = mixture_target.target_id
+                """,
                 (mixture.id,),
             ).fetchall()
         ]
@@ -1865,10 +2091,11 @@ def _target_truth_configs(db: partial, t_id: int) -> TruthConfigs:
     truth_configs: TruthConfigs = {}
     with db() as c:
         for truth_config_record in c.execute(
-            "SELECT truth_config.config "
-            + "FROM truth_config, target_file_truth_config "
-            + "WHERE ? = target_file_truth_config.target_file_id "
-            + "AND truth_config.id = target_file_truth_config.truth_config_id",
+            """
+            SELECT truth_config.config
+            FROM truth_config, target_file_truth_config
+            WHERE ? = target_file_truth_config.target_file_id AND truth_config.id = target_file_truth_config.truth_config_id
+            """,
             (t_id,),
         ).fetchall():
             truth_config = json.loads(truth_config_record[0])

sonusai 0.19.6__py3-none-any.whl → 0.19.9__py3-none-any.whl

sonusai 0.19.6py3-none-any.whl → 0.19.9py3-none-any.whl