PyPI - sonusai - Versions diffs - 0.18.2__py3-none-any.whl → 0.18.5__py3-none-any.whl - Mend

sonusai 0.18.2py3-none-any.whl → 0.18.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

sonusai/__init__.py +1 -0
sonusai/audiofe.py +1 -1
sonusai/calc_metric_spenh.py +32 -362
sonusai/data/genmixdb.yml +2 -0
sonusai/doc/doc.py +45 -4
sonusai/genmetrics.py +137 -109
sonusai/lsdb.py +2 -2
sonusai/metrics/__init__.py +4 -0
sonusai/metrics/calc_audio_stats.py +42 -0
sonusai/metrics/calc_pesq.py +12 -8
sonusai/metrics/calc_phase_distance.py +43 -0
sonusai/metrics/calc_snr_f.py +34 -0
sonusai/metrics/calc_speech.py +312 -0
sonusai/metrics/calc_wer.py +2 -3
sonusai/metrics/calc_wsdr.py +0 -59
sonusai/mixture/__init__.py +3 -2
sonusai/mixture/audio.py +6 -5
sonusai/mixture/config.py +13 -0
sonusai/mixture/constants.py +1 -0
sonusai/mixture/datatypes.py +33 -0
sonusai/mixture/generation.py +6 -2
sonusai/mixture/mixdb.py +263 -122
sonusai/mixture/soundfile_audio.py +8 -6
sonusai/mixture/sox_audio.py +16 -13
sonusai/mixture/torchaudio_audio.py +6 -4
sonusai/mixture/truth_functions/energy.py +40 -28
sonusai/mixture/truth_functions/target.py +0 -1
sonusai/utils/__init__.py +1 -1
sonusai/utils/asr.py +26 -39
sonusai/utils/asr_functions/aaware_whisper.py +3 -3
{sonusai-0.18.2.dist-info → sonusai-0.18.5.dist-info}/METADATA +1 -1
{sonusai-0.18.2.dist-info → sonusai-0.18.5.dist-info}/RECORD +34 -31
sonusai/mixture/mapped_snr_f.py +0 -100
{sonusai-0.18.2.dist-info → sonusai-0.18.5.dist-info}/WHEEL +0 -0
{sonusai-0.18.2.dist-info → sonusai-0.18.5.dist-info}/entry_points.txt +0 -0

sonusai/genmetrics.py CHANGED Viewed

@@ -1,146 +1,174 @@
-# Generate mixdb metrics based on metrics listed in config.yml
+"""sonusai genmetrics
+usage: genmetrics [-hvs] [-i MIXID] [-n INCLUDE] [-x EXCLUDE] LOC
-class MixtureMetrics:
-    @property
-    def mxsnr(self):
-        ...
+options:
+    -h, --help
+    -v, --verbose                   Be verbose.
+    -i MIXID, --mixid MIXID         Mixture ID(s) to generate. [default: *].
+    -n INCLUDE, --include INCLUDE   Metrics to include. [default: all]
+    -x EXCLUDE, --exclude EXCLUDE   Metrics to exclude. [default: none]
+    -s, --supported                 Show list of supported metrics.
-    @property
-    def mxssnravg(self):
-        ...
+Calculate speech enhancement metrics of SonusAI mixture data in LOC.
-    @property
-    def mxssnrstd(self):
-        ...
+Inputs:
+    LOC         A SonusAI mixture database directory.
+    MIXID       A glob of mixture ID(s) to generate.
+    INCLUDE     Comma separated list of metrics to include. Can be 'all' or
+                any of the supported metrics.
+    EXCLUDE     Comma separated list of metrics to exclude. Can be 'none' or
+                any of the supported metrics.
-    @property
-    def mxssnrdavg(self):
-        ...
+Examples:
-    @property
-    def mxssnrdstd(self):
-        ...
+Generate all available mxwer metrics (as determined by mixdb asr_configs parameter):
+> sonusai genmetrics -n"mxwer" mixdb_loc
-    @property
-    def mxpesq(self):
-        ...
+Generate only mxwer.faster metrics:
+> sonusai genmetrics -n"mxwer.faster" mixdb_loc
-    @property
-    def mxwsdr(self):
-        ...
+Generate all available metrics except for mxwer.faster:
+> sonusai genmetrics -x"mxwer.faster" mixdb_loc
-    @property
-    def mxpd(self):
-        ...
+"""
+import signal
+from dataclasses import dataclass
-    @property
-    def mxstoi(self):
-        ...
+from sonusai.mixture import MixtureDatabase
-    @property
-    def mxcsig(self):
-        ...
-    @property
-    def mxcbak(self):
-        ...
+def signal_handler(_sig, _frame):
+    import sys
-    @property
-    def mxcovl(self):
-        ...
+    from sonusai import logger
-    def mxwer(self, engine: str, model: str):
-        ...
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
-    @property
-    def tdco(self):
-        ...
-    @property
-    def tmin(self):
-        ...
+signal.signal(signal.SIGINT, signal_handler)
-    @property
-    def tmax(self):
-        ...
-    @property
-    def tpkdb(self):
-        ...
+@dataclass
+class MPGlobal:
+    mixdb: MixtureDatabase = None
+    metrics: set[str] = None
-    @property
-    def tlrms(self):
-        ...
-    @property
-    def tpkr(self):
-        ...
+MP_GLOBAL = MPGlobal()
-    @property
-    def ttr(self):
-        ...
-    @property
-    def tcr(self):
-        ...
+def _initializer(location: str, metrics: set[str]) -> None:
+    MP_GLOBAL.mixdb = MixtureDatabase(location)
+    MP_GLOBAL.metrics = metrics
-    @property
-    def tfl(self):
-        ...
-    @property
-    def tpkc(self):
-        ...
+def _process_mixture(mixid: int) -> None:
+    from sonusai.mixture import write_mixture_data
-    @property
-    def ndco(self):
-        ...
+    mixdb = MP_GLOBAL.mixdb
+    metrics = list(MP_GLOBAL.metrics)
-    @property
-    def nmin(self):
-        ...
+    values = mixdb.mixture_metrics(m_id=mixid, metrics=metrics, force=True)
+    write_data = list(zip(metrics, values))
-    @property
-    def nmax(self):
-        ...
+    write_mixture_data(mixdb, mixdb.mixture(mixid), write_data)
-    @property
-    def npkdb(self):
-        ...
-    @property
-    def nlrms(self):
-        ...
+def main() -> None:
+    from docopt import docopt
-    @property
-    def npkr(self):
-        ...
+    import sonusai
+    from sonusai.utils import trim_docstring
-    @property
-    def ntr(self):
-        ...
+    args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
-    @property
-    def ncr(self):
-        ...
+    verbose = args['--verbose']
+    mixids = args['--mixid']
+    includes = [x.strip() for x in args['--include'].lower().split(',')]
+    excludes = [x.strip() for x in args['--exclude'].lower().split(',')]
+    show_supported = args['--supported']
+    location = args['LOC']
-    @property
-    def nfl(self):
-        ...
+    import sys
+    import time
+    from os.path import join
-    @property
-    def npkc(self):
-        ...
+    from sonusai import create_file_handler
+    from sonusai import initial_log_messages
+    from sonusai import logger
+    from sonusai import update_console_handler
+    from sonusai.utils import pp_tqdm_imap
+    from sonusai.utils import seconds_to_hms
+    from tqdm import tqdm
-    @property
-    def sedavg(self):
-        ...
+    start_time = time.monotonic()
-    @property
-    def sedcnt(self):
-        ...
+    # Setup logging file
+    create_file_handler(join(location, 'genmetrics.log'))
+    update_console_handler(verbose)
+    initial_log_messages('genmetrics')
-    @property
-    def sedtopn(self):
-        ...
+    logger.info(f'Load mixture database from {location}')
+    mixdb = MixtureDatabase(location)
+    supported = mixdb.supported_metrics
+    if show_supported:
+        logger.info(f'\nSupported metrics: {", ".join(sorted(supported))}')
+        sys.exit(0)
+    if includes is None or 'all' in includes:
+        metrics = supported
+    else:
+        metrics = set(includes)
+        if 'mxwer' in metrics:
+            metrics.remove('mxwer')
+            for name in mixdb.asr_configs:
+                metrics.add(f'mxwer.{name}')
+    diff = metrics.difference(supported)
+    if diff:
+        logger.error(f'Unrecognized metric: {", ".join(diff)}')
+        sys.exit(1)
+    if excludes is None or 'none' in excludes:
+        _excludes = set([])
+    else:
+        _excludes = set(excludes)
+        if 'mxwer' in _excludes:
+            _excludes.remove('mxwer')
+            for name in mixdb.asr_configs:
+                _excludes.add(f'mxwer.{name}')
+    diff = _excludes.difference(supported)
+    if diff:
+        logger.error(f'Unrecognized metric: {", ".join(diff)}')
+        sys.exit(1)
+    for exclude in _excludes:
+        metrics.discard(exclude)
+    logger.info(f'Generating metrics: {", ".join(metrics)}')
+    mixids = mixdb.mixids_to_list(mixids)
+    logger.info('')
+    logger.info(f'Found {len(mixids):,} mixtures to process')
+    progress = tqdm(total=len(mixids), desc='genmetrics')
+    pp_tqdm_imap(_process_mixture, mixids,
+                 progress=progress,
+                 initializer=_initializer,
+                 initargs=(location, metrics))
+    progress.close()
+    logger.info(f'Wrote metrics for {len(mixids)} mixtures to {location}')
+    logger.info('')
+    end_time = time.monotonic()
+    logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
+    logger.info('')
+if __name__ == '__main__':
+    main()

sonusai/lsdb.py CHANGED Viewed

@@ -43,7 +43,7 @@ def lsdb(mixdb: MixtureDatabase,
     import h5py
     from sonusai import SonusAIError
-    from sonusai.mixture import calculate_snr_f_statistics
+    from sonusai.metrics import calc_snr_f
     from sonusai.mixture import SAMPLE_RATE
     from sonusai.mixture import get_truth_indices_for_target
     from sonusai.queries import get_mixids_from_truth_index
@@ -113,7 +113,7 @@ def lsdb(mixdb: MixtureDatabase,
                 else:
                     truth_f = np.concatenate((truth_f, np.array(f['truth_f'])))
-        snr_mean, snr_std, snr_db_mean, snr_db_std = calculate_snr_f_statistics(truth_f)
+        snr_mean, snr_std, snr_db_mean, snr_db_std = calc_snr_f(truth_f)
         logger.info('Truth')
         logger.info(f'  {"mean":^8s}  {"std":^8s}  {"db_mean":^8s}  {"db_std":^8s}')

sonusai/metrics/__init__.py CHANGED Viewed

@@ -1,11 +1,15 @@
 # SonusAI metrics utilities for model training and validation
+from .calc_audio_stats import calc_audio_stats
 from .calc_class_weights import calc_class_weights_from_mixdb
 from .calc_class_weights import calc_class_weights_from_truth
 from .calc_optimal_thresholds import calc_optimal_thresholds
 from .calc_pcm import calc_pcm
 from .calc_pesq import calc_pesq
+from .calc_phase_distance import calc_phase_distance
 from .calc_sa_sdr import calc_sa_sdr
 from .calc_sample_weights import calc_sample_weights
+from .calc_snr_f import calc_snr_f
+from .calc_speech import calc_speech
 from .calc_wer import calc_wer
 from .calc_wsdr import calc_wsdr
 from .class_summary import class_summary

sonusai/metrics/calc_audio_stats.py ADDED Viewed

@@ -0,0 +1,42 @@
+from sonusai.mixture.datatypes import AudioStatsMetrics
+from sonusai.mixture.datatypes import AudioT
+def calc_audio_stats(audio: AudioT, win_len: float = None) -> AudioStatsMetrics:
+    from sonusai.mixture import SAMPLE_RATE
+    from sonusai.mixture import Transformer
+    args = ['stats']
+    if win_len is not None:
+        args.extend(['-w', str(win_len)])
+    tfm = Transformer()
+    _, _, out = tfm.build(input_array=audio,
+                          sample_rate_in=SAMPLE_RATE,
+                          output_filepath='-n',
+                          extra_args=args,
+                          return_output=True)
+    stats = {}
+    lines = out.split('\n')
+    for line in lines:
+        split_line = line.split()
+        if len(split_line) == 0:
+            continue
+        value = split_line[-1]
+        key = ' '.join(split_line[:-1])
+        stats[key] = value
+    return AudioStatsMetrics(
+        dco=float(stats['DC offset']),
+        min=float(stats['Min level']),
+        max=float(stats['Max level']),
+        pkdb=float(stats['Pk lev dB']),
+        lrms=float(stats['RMS lev dB']),
+        pkr=float(stats['RMS Pk dB']),
+        tr=float(stats['RMS Tr dB']),
+        cr=float(stats['Crest factor']),
+        fl=float(stats['Flat factor']),
+        pkc=int(stats['Pk count']),
+    )

sonusai/metrics/calc_pesq.py CHANGED Viewed

@@ -1,14 +1,20 @@
 import numpy as np
+from sonusai.mixture.constants import SAMPLE_RATE
-def calc_pesq(hypothesis: np.ndarray, reference: np.ndarray, error_value: float = 0.0) -> float:
-    """Computes the PESQ score of speech estimate audio vs. the clean speech estimate audio
+def calc_pesq(hypothesis: np.ndarray,
+              reference: np.ndarray,
+              error_value: float = 0.0,
+              sample_rate: int = SAMPLE_RATE) -> float:
+    """Computes the PESQ score of hypothesis vs. reference
     Upon error, assigns a value of 0, or user specified value in error_value
-    :param hypothesis: speech estimated audio
-    :param reference: speech reference audio
-    :param error_value:
+    :param hypothesis: estimated audio
+    :param reference: reference audio
+    :param error_value: value to use if error occurs
+    :param sample_rate: sample rate of audio
     :return: value between -0.5 to 4.5
     """
     import warnings
@@ -16,12 +22,10 @@ def calc_pesq(hypothesis: np.ndarray, reference: np.ndarray, error_value: float
     from pesq import pesq
     from sonusai import logger
-    from sonusai.mixture import SAMPLE_RATE
     try:
         with warnings.catch_warnings():
             warnings.simplefilter('ignore')
-            score = pesq(SAMPLE_RATE, reference, hypothesis, mode='wb')
+            score = pesq(fs=sample_rate, ref=reference, deg=hypothesis, mode='wb')
     except Exception as e:
         logger.debug(f'PESQ error {e}')
         score = error_value

sonusai/metrics/calc_phase_distance.py ADDED Viewed

@@ -0,0 +1,43 @@
+import numpy as np
+def calc_phase_distance(reference: np.ndarray,
+                        hypothesis: np.ndarray,
+                        eps: float = 1e-9) -> tuple[float, np.ndarray, np.ndarray]:
+    """Calculate weighted phase distance error (weight normalization over bins per frame)
+    :param reference: complex [frames, bins]
+    :param hypothesis: complex [frames, bins]
+    :param eps: epsilon value
+    :return: mean, mean per bin, mean per frame
+    """
+    ang_diff = np.angle(reference) - np.angle(hypothesis)
+    phd_mod = (ang_diff + np.pi) % (2 * np.pi) - np.pi
+    rh_angle_diff = phd_mod * 180 / np.pi  # angle diff in deg
+    # Use complex divide to intrinsically keep angle diff +/-180 deg, but avoid div by zero (real hyp)
+    # hyp_real = np.real(hypothesis)
+    # near_zeros = np.real(hyp_real) < eps
+    # hyp_real = hyp_real * (np.logical_not(near_zeros))
+    # hyp_real = hyp_real + (near_zeros * eps)
+    # hypothesis = hyp_real + 1j*np.imag(hypothesis)
+    # rh_angle_diff = np.angle(reference / hypothesis) * 180 / np.pi  # angle diff +/-180
+    # weighted mean over all (scalar)
+    reference_mag = np.abs(reference)
+    ref_weight = reference_mag / (np.sum(reference_mag) + eps)  # frames x bins
+    err = np.around(np.sum(ref_weight * rh_angle_diff), 3)
+    # weighted mean over frames (value per bin)
+    err_b = np.zeros(reference.shape[1])
+    for bi in range(reference.shape[1]):
+        ref_weight = reference_mag[:, bi] / (np.sum(reference_mag[:, bi], axis=0) + eps)
+        err_b[bi] = np.around(np.sum(ref_weight * rh_angle_diff[:, bi]), 3)
+    # weighted mean over bins (value per frame)
+    err_f = np.zeros(reference.shape[0])
+    for fi in range(reference.shape[0]):
+        ref_weight = reference_mag[fi, :] / (np.sum(reference_mag[fi, :]) + eps)
+        err_f[fi] = np.around(np.sum(ref_weight * rh_angle_diff[fi, :]), 3)
+    return err, err_b, err_f

sonusai/metrics/calc_snr_f.py ADDED Viewed

@@ -0,0 +1,34 @@
+import numpy as np
+from sonusai.mixture.datatypes import Segsnr
+from sonusai.mixture.datatypes import SnrFMetrics
+def calc_snr_f(segsnr_f: Segsnr) -> SnrFMetrics:
+    """Calculate metrics of snr_f truth data.
+    For now, includes mean and variance of the raw values (usually energy)
+    and mean and standard deviation of the dB values (10 * log10).
+    """
+    if np.count_nonzero(segsnr_f) == 0:
+        # If all entries are zeros
+        return SnrFMetrics(0, 0, -np.inf, 0)
+    tmp = np.ma.array(segsnr_f, mask=np.logical_not(np.isfinite(segsnr_f)), dtype=np.float32)
+    if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
+        # If all entries are infinite
+        return SnrFMetrics(np.inf, 0, np.inf, 0)
+    snr_mean = np.mean(tmp, axis=0)
+    snr_var = np.var(tmp, axis=0)
+    tmp = 10 * np.ma.log10(tmp)
+    if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
+        # If all entries are masked, special case where all inputs are either 0 or infinite
+        snr_db_mean = -np.inf
+        snr_db_std = np.inf
+    else:
+        snr_db_mean = np.mean(tmp, axis=0)
+        snr_db_std = np.std(tmp, axis=0)
+    return SnrFMetrics(snr_mean, snr_var, snr_db_mean, snr_db_std)

sonusai 0.18.2__py3-none-any.whl → 0.18.5__py3-none-any.whl

sonusai 0.18.2py3-none-any.whl → 0.18.5py3-none-any.whl