PyPI - sonusai - Versions diffs - 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl - Mend

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

sonusai/__init__.py +20 -29
sonusai/aawscd_probwrite.py +18 -18
sonusai/audiofe.py +93 -80
sonusai/calc_metric_spenh.py +395 -321
sonusai/data/genmixdb.yml +5 -11
sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
sonusai/{plot.py → deprecated/plot.py} +177 -131
sonusai/{tplot.py → deprecated/tplot.py} +124 -102
sonusai/doc/__init__.py +1 -1
sonusai/doc/doc.py +112 -177
sonusai/doc.py +10 -10
sonusai/genft.py +93 -77
sonusai/genmetrics.py +59 -46
sonusai/genmix.py +116 -104
sonusai/genmixdb.py +194 -153
sonusai/lsdb.py +56 -66
sonusai/main.py +23 -20
sonusai/metrics/__init__.py +2 -0
sonusai/metrics/calc_audio_stats.py +29 -24
sonusai/metrics/calc_class_weights.py +7 -7
sonusai/metrics/calc_optimal_thresholds.py +5 -7
sonusai/metrics/calc_pcm.py +3 -3
sonusai/metrics/calc_pesq.py +10 -7
sonusai/metrics/calc_phase_distance.py +3 -3
sonusai/metrics/calc_sa_sdr.py +10 -8
sonusai/metrics/calc_segsnr_f.py +15 -17
sonusai/metrics/calc_speech.py +105 -47
sonusai/metrics/calc_wer.py +35 -32
sonusai/metrics/calc_wsdr.py +10 -7
sonusai/metrics/class_summary.py +30 -27
sonusai/metrics/confusion_matrix_summary.py +25 -22
sonusai/metrics/one_hot.py +91 -57
sonusai/metrics/snr_summary.py +53 -46
sonusai/mixture/__init__.py +19 -14
sonusai/mixture/audio.py +4 -6
sonusai/mixture/augmentation.py +37 -43
sonusai/mixture/class_count.py +5 -14
sonusai/mixture/config.py +292 -225
sonusai/mixture/constants.py +41 -30
sonusai/mixture/data_io.py +155 -0
sonusai/mixture/datatypes.py +111 -108
sonusai/mixture/db_datatypes.py +54 -70
sonusai/mixture/eq_rule_is_valid.py +6 -9
sonusai/mixture/feature.py +50 -46
sonusai/mixture/generation.py +522 -389
sonusai/mixture/helpers.py +217 -272
sonusai/mixture/log_duration_and_sizes.py +16 -13
sonusai/mixture/mixdb.py +677 -473
sonusai/mixture/soundfile_audio.py +12 -17
sonusai/mixture/sox_audio.py +91 -112
sonusai/mixture/sox_augmentation.py +8 -9
sonusai/mixture/spectral_mask.py +4 -6
sonusai/mixture/target_class_balancing.py +41 -36
sonusai/mixture/targets.py +69 -67
sonusai/mixture/tokenized_shell_vars.py +23 -23
sonusai/mixture/torchaudio_audio.py +14 -15
sonusai/mixture/torchaudio_augmentation.py +23 -27
sonusai/mixture/truth.py +48 -26
sonusai/mixture/truth_functions/__init__.py +26 -0
sonusai/mixture/truth_functions/crm.py +56 -38
sonusai/mixture/truth_functions/datatypes.py +37 -0
sonusai/mixture/truth_functions/energy.py +85 -59
sonusai/mixture/truth_functions/file.py +30 -30
sonusai/mixture/truth_functions/phoneme.py +14 -7
sonusai/mixture/truth_functions/sed.py +71 -45
sonusai/mixture/truth_functions/target.py +69 -106
sonusai/mkwav.py +52 -85
sonusai/onnx_predict.py +46 -43
sonusai/queries/__init__.py +3 -1
sonusai/queries/queries.py +100 -59
sonusai/speech/__init__.py +2 -0
sonusai/speech/l2arctic.py +24 -23
sonusai/speech/librispeech.py +16 -17
sonusai/speech/mcgill.py +22 -21
sonusai/speech/textgrid.py +32 -25
sonusai/speech/timit.py +45 -42
sonusai/speech/vctk.py +14 -13
sonusai/speech/voxceleb.py +26 -20
sonusai/summarize_metric_spenh.py +11 -10
sonusai/utils/__init__.py +4 -3
sonusai/utils/asl_p56.py +1 -1
sonusai/utils/asr.py +37 -17
sonusai/utils/asr_functions/__init__.py +2 -0
sonusai/utils/asr_functions/aaware_whisper.py +18 -12
sonusai/utils/audio_devices.py +12 -12
sonusai/utils/braced_glob.py +6 -8
sonusai/utils/calculate_input_shape.py +1 -4
sonusai/utils/compress.py +2 -2
sonusai/utils/convert_string_to_number.py +1 -3
sonusai/utils/create_timestamp.py +1 -1
sonusai/utils/create_ts_name.py +2 -2
sonusai/utils/dataclass_from_dict.py +1 -1
sonusai/utils/docstring.py +6 -6
sonusai/utils/energy_f.py +9 -7
sonusai/utils/engineering_number.py +56 -54
sonusai/utils/get_label_names.py +8 -10
sonusai/utils/human_readable_size.py +2 -2
sonusai/utils/model_utils.py +3 -5
sonusai/utils/numeric_conversion.py +2 -4
sonusai/utils/onnx_utils.py +43 -32
sonusai/utils/parallel.py +40 -27
sonusai/utils/print_mixture_details.py +25 -22
sonusai/utils/ranges.py +12 -12
sonusai/utils/read_predict_data.py +11 -9
sonusai/utils/reshape.py +19 -26
sonusai/utils/seconds_to_hms.py +1 -1
sonusai/utils/stacked_complex.py +8 -16
sonusai/utils/stratified_shuffle_split.py +29 -27
sonusai/utils/write_audio.py +2 -2
sonusai/utils/yes_or_no.py +3 -3
sonusai/vars.py +14 -14
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
sonusai-0.19.5.dist-info/RECORD +125 -0
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
sonusai/mixture/truth_functions/data.py +0 -58
sonusai/utils/read_mixture_data.py +0 -14
sonusai-0.18.8.dist-info/RECORD +0 -125
{sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0

sonusai/metrics/calc_speech.py CHANGED Viewed

@@ -2,12 +2,13 @@ import numpy as np
 from sonusai.mixture.constants import SAMPLE_RATE
 from sonusai.mixture.datatypes import SpeechMetrics
 from .calc_pesq import calc_pesq
 def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE) -> SpeechMetrics:
     """Calculate speech metrics pesq, c_sig, c_bak, and c_ovl.
     These are all related and thus included in one function. Reference: matlab script "compute_metrics.m".
     :param hypothesis: estimated audio
@@ -22,7 +23,7 @@ def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int
     # Value from CMGAN reference implementation
     alpha = 0.95
-    wss_dist = np.mean(wss_dist_vec[0: round(np.size(wss_dist_vec) * alpha)])
+    wss_dist = np.mean(wss_dist_vec[0 : round(np.size(wss_dist_vec) * alpha)])
     # Log likelihood ratio measure
     llr_dist = _calc_log_likelihood_ratio_measure(hypothesis=hypothesis, reference=reference, sample_rate=sample_rate)
@@ -45,16 +46,16 @@ def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int
     return SpeechMetrics(_pesq, csig, cbak, covl)
-def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
-                                          reference: np.ndarray,
-                                          sample_rate: int = SAMPLE_RATE) -> np.ndarray:
+def _calc_weighted_spectral_slope_measure(
+    hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
+) -> np.ndarray:
     from scipy.fftpack import fft
     # The lengths of the reference and hypothesis must be the same.
     reference_length = np.size(reference)
     hypothesis_length = np.size(hypothesis)
     if reference_length != hypothesis_length:
-        raise ValueError('Hypothesis and reference must be the same length.')
+        raise ValueError("Hypothesis and reference must be the same length.")
     # Window length in samples
     win_length = int(np.round(30 * sample_rate / 1000))
@@ -72,14 +73,64 @@ def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
     k_loc_max = 1.0
     # Critical band filter definitions (center frequency and bandwidths in Hz)
-    cent_freq = np.array([50.0000, 120.000, 190.000, 260.000, 330.000, 400.000, 470.000,
-                          540.000, 617.372, 703.378, 798.717, 904.128, 1020.38, 1148.30,
-                          1288.72, 1442.54, 1610.70, 1794.16, 1993.93, 2211.08, 2446.71,
-                          2701.97, 2978.04, 3276.17, 3597.63])
-    bandwidth = np.array([70.0000, 70.0000, 70.0000, 70.0000, 70.0000, 70.0000, 70.0000,
-                          77.3724, 86.0056, 95.3398, 105.411, 116.256, 127.914, 140.423,
-                          153.823, 168.154, 183.457, 199.776, 217.153, 235.631, 255.255,
-                          276.072, 298.126, 321.465, 346.136])
+    cent_freq = np.array(
+        [
+            50.0000,
+            120.000,
+            190.000,
+            260.000,
+            330.000,
+            400.000,
+            470.000,
+            540.000,
+            617.372,
+            703.378,
+            798.717,
+            904.128,
+            1020.38,
+            1148.30,
+            1288.72,
+            1442.54,
+            1610.70,
+            1794.16,
+            1993.93,
+            2211.08,
+            2446.71,
+            2701.97,
+            2978.04,
+            3276.17,
+            3597.63,
+        ]
+    )
+    bandwidth = np.array(
+        [
+            70.0000,
+            70.0000,
+            70.0000,
+            70.0000,
+            70.0000,
+            70.0000,
+            70.0000,
+            77.3724,
+            86.0056,
+            95.3398,
+            105.411,
+            116.256,
+            127.914,
+            140.423,
+            153.823,
+            168.154,
+            183.457,
+            199.776,
+            217.153,
+            235.631,
+            255.255,
+            276.072,
+            298.126,
+            321.465,
+            346.136,
+        ]
+    )
     # Minimum critical bandwidth
     bw_min = bandwidth[0]
@@ -109,8 +160,8 @@ def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
     distortion = np.empty(num_frames)
     for frame_count in range(num_frames):
         # (1) Get the frames for the test and reference speech. Multiply by Hanning window.
-        reference_frame = reference[start: start + win_length] / 32768
-        hypothesis_frame = hypothesis[start: start + win_length] / 32768
+        reference_frame = reference[start : start + win_length] / 32768
+        hypothesis_frame = hypothesis[start : start + win_length] / 32768
         reference_frame = np.multiply(reference_frame, window)
         hypothesis_frame = np.multiply(hypothesis_frame, window)
@@ -122,12 +173,12 @@ def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
         reference_energy = np.matmul(crit_filter, reference_spec[0:n_fft_by_2])
         hypothesis_energy = np.matmul(crit_filter, hypothesis_spec[0:n_fft_by_2])
-        reference_energy = 10 * np.log10(np.maximum(reference_energy, 1E-10))
-        hypothesis_energy = 10 * np.log10(np.maximum(hypothesis_energy, 1E-10))
+        reference_energy = 10 * np.log10(np.maximum(reference_energy, 1e-10))
+        hypothesis_energy = 10 * np.log10(np.maximum(hypothesis_energy, 1e-10))
         # (4) Compute spectral slope (dB[i+1]-dB[i])
-        reference_slope = reference_energy[1:num_crit] - reference_energy[0: num_crit - 1]
-        hypothesis_slope = hypothesis_energy[1:num_crit] - hypothesis_energy[0: num_crit - 1]
+        reference_slope = reference_energy[1:num_crit] - reference_energy[0 : num_crit - 1]
+        hypothesis_slope = hypothesis_energy[1:num_crit] - hypothesis_energy[0 : num_crit - 1]
         # (5) Find the nearest peak locations in the spectra to each critical band.
         #     If the slope is negative, we search to the left. If positive, we search to the right.
@@ -173,17 +224,22 @@ def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
         # and less emphasis on slope differences in spectral valleys.
         # This procedure is described on page 1280 of Klatt's 1982 ICASSP paper.
-        w_max_reference = np.divide(k_max, k_max + db_max_reference - reference_energy[0: num_crit - 1])
-        w_loc_max_reference = np.divide(k_loc_max, k_loc_max + reference_loc_peak - reference_energy[0: num_crit - 1])
+        w_max_reference = np.divide(k_max, k_max + db_max_reference - reference_energy[0 : num_crit - 1])
+        w_loc_max_reference = np.divide(
+            k_loc_max,
+            k_loc_max + reference_loc_peak - reference_energy[0 : num_crit - 1],
+        )
         w_reference = np.multiply(w_max_reference, w_loc_max_reference)
-        w_max_hypothesis = np.divide(k_max, k_max + db_max_hypothesis - hypothesis_energy[0: num_crit - 1])
-        w_loc_max_hypothesis = np.divide(k_loc_max,
-                                         k_loc_max + hypothesis_loc_peak - hypothesis_energy[0: num_crit - 1])
+        w_max_hypothesis = np.divide(k_max, k_max + db_max_hypothesis - hypothesis_energy[0 : num_crit - 1])
+        w_loc_max_hypothesis = np.divide(
+            k_loc_max,
+            k_loc_max + hypothesis_loc_peak - hypothesis_energy[0 : num_crit - 1],
+        )
         w_hypothesis = np.multiply(w_max_hypothesis, w_loc_max_hypothesis)
         w = np.divide(np.add(w_reference, w_hypothesis), 2.0)
-        slope_diff = np.subtract(reference_slope, hypothesis_slope)[0: num_crit - 1]
+        slope_diff = np.subtract(reference_slope, hypothesis_slope)[0 : num_crit - 1]
         distortion[frame_count] = np.dot(w, np.square(slope_diff)) / np.sum(w)
         # This normalization is not part of Klatt's paper, but helps to normalize the measure.
@@ -193,16 +249,16 @@ def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
     return distortion
-def _calc_log_likelihood_ratio_measure(hypothesis: np.ndarray,
-                                       reference: np.ndarray,
-                                       sample_rate: int = SAMPLE_RATE) -> np.ndarray:
+def _calc_log_likelihood_ratio_measure(
+    hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
+) -> np.ndarray:
     from scipy.linalg import toeplitz
     # The lengths of the reference and hypothesis must be the same.
     reference_length = np.size(reference)
     hypothesis_length = np.size(hypothesis)
     if reference_length != hypothesis_length:
-        raise ValueError('Hypothesis and reference must be the same length.')
+        raise ValueError("Hypothesis and reference must be the same length.")
     # window length in samples
     win_length = int(np.round(30 * sample_rate / 1000))
@@ -222,8 +278,8 @@ def _calc_log_likelihood_ratio_measure(hypothesis: np.ndarray,
     distortion = np.empty(num_frames)
     for frame_count in range(num_frames):
         # (1) Get the frames for the test and reference speech. Multiply by Hanning window.
-        reference_frame = reference[start: start + win_length]
-        hypothesis_frame = hypothesis[start: start + win_length]
+        reference_frame = reference[start : start + win_length]
+        hypothesis_frame = hypothesis[start : start + win_length]
         reference_frame = np.multiply(reference_frame, window)
         hypothesis_frame = np.multiply(hypothesis_frame, window)
@@ -239,16 +295,18 @@ def _calc_log_likelihood_ratio_measure(hypothesis: np.ndarray,
     return distortion
-def _calc_snr(hypothesis: np.ndarray,
-              reference: np.ndarray,
-              sample_rate: int = SAMPLE_RATE) -> tuple[float, np.ndarray]:
+def _calc_snr(
+    hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE
+) -> tuple[float, np.ndarray]:
     # The lengths of the reference and hypothesis must be the same.
     reference_length = len(reference)
     hypothesis_length = len(hypothesis)
     if reference_length != hypothesis_length:
-        raise ValueError('Hypothesis and reference must be the same length.')
+        raise ValueError("Hypothesis and reference must be the same length.")
-    overall_snr = 10 * np.log10(np.sum(np.square(reference)) / np.sum(np.square(reference - hypothesis)))
+    overall_snr = 10 * np.log10(
+        np.sum(np.square(reference)) / (np.sum(np.square(reference - hypothesis))) + np.finfo(np.float32).eps
+    )
     # window length in samples
     win_length = round(30 * sample_rate / 1000)
@@ -268,17 +326,17 @@ def _calc_snr(hypothesis: np.ndarray,
     eps = np.spacing(1)
     for frame_count in range(num_frames):
         # (1) Get the frames for the test and reference speech. Multiply by Hanning window.
-        reference_frame = reference[start:start + win_length]
-        hypothesis_frame = hypothesis[start:start + win_length]
+        reference_frame = reference[start : start + win_length]
+        hypothesis_frame = hypothesis[start : start + win_length]
         reference_frame = np.multiply(reference_frame, window)
         hypothesis_frame = np.multiply(hypothesis_frame, window)
         # (2) Compute the segmental SNR
         signal_energy = np.sum(np.square(reference_frame))
         noise_energy = np.sum(np.square(reference_frame - hypothesis_frame))
-        segmental_snr[frame_count] = np.clip(10 * np.log10(signal_energy / (noise_energy + eps) + eps),
-                                             min_snr,
-                                             max_snr)
+        segmental_snr[frame_count] = np.clip(
+            10 * np.log10(signal_energy / (noise_energy + eps) + eps), min_snr, max_snr
+        )
         start = start + skip_rate
@@ -291,7 +349,7 @@ def _lp_coefficients(speech_frame, model_order):
     autocorrelation = np.empty(model_order + 1)
     e = np.empty(model_order + 1)
     for k in range(model_order + 1):
-        autocorrelation[k] = np.dot(speech_frame[0:win_length - k], speech_frame[k: win_length])
+        autocorrelation[k] = np.dot(speech_frame[0 : win_length - k], speech_frame[k:win_length])
     # (2) Levinson-Durbin
     a = np.ones(model_order)
@@ -299,14 +357,14 @@ def _lp_coefficients(speech_frame, model_order):
     ref_coefficients = np.empty(model_order)
     e[0] = autocorrelation[0]
     for i in range(model_order):
-        a_past[0: i] = a[0: i]
-        sum_term = np.dot(a_past[0: i], autocorrelation[i:0:-1])
+        a_past[0:i] = a[0:i]
+        sum_term = np.dot(a_past[0:i], autocorrelation[i:0:-1])
         ref_coefficients[i] = (autocorrelation[i + 1] - sum_term) / e[i]
         a[i] = ref_coefficients[i]
         if i == 0:
-            a[0: i] = a_past[0: i] - np.multiply(a_past[i - 1:-1:-1], ref_coefficients[i])
+            a[0:i] = a_past[0:i] - np.multiply(a_past[i - 1 : -1 : -1], ref_coefficients[i])
         else:
-            a[0: i] = a_past[0: i] - np.multiply(a_past[i - 1::-1], ref_coefficients[i])
+            a[0:i] = a_past[0:i] - np.multiply(a_past[i - 1 :: -1], ref_coefficients[i])
         e[i + 1] = (1 - ref_coefficients[i] * ref_coefficients[i]) * e[i]
     lp_params = np.concatenate((np.array([1]), -a))
     return autocorrelation, ref_coefficients, lp_params

sonusai/metrics/calc_wer.py CHANGED Viewed

@@ -18,15 +18,17 @@ def calc_wer(hypothesis: list[str] | str, reference: list[str] | str) -> WerResu
     """
     import jiwer
-    transformation = jiwer.Compose([
-        jiwer.ToLowerCase(),
-        jiwer.RemovePunctuation(),
-        jiwer.RemoveWhiteSpace(replace_by_space=True),
-        jiwer.RemoveMultipleSpaces(),
-        jiwer.Strip(),
-        jiwer.RemoveEmptyStrings(),
-        jiwer.ReduceToListOfListOfWords(word_delimiter=' ')
-    ])
+    transformation = jiwer.Compose(
+        [
+            jiwer.ToLowerCase(),
+            jiwer.RemovePunctuation(),
+            jiwer.RemoveWhiteSpace(replace_by_space=True),
+            jiwer.RemoveMultipleSpaces(),
+            jiwer.Strip(),
+            jiwer.RemoveEmptyStrings(),
+            jiwer.ReduceToListOfListOfWords(word_delimiter=" "),
+        ]
+    )
     if isinstance(reference, str):
         reference = [reference]
@@ -34,35 +36,36 @@ def calc_wer(hypothesis: list[str] | str, reference: list[str] | str) -> WerResu
         hypothesis = [hypothesis]
     # jiwer does not allow empty string
-    measures = {'insertions':    0,
-                'substitutions': 0,
-                'deletions':     0,
-                'hits':          0}
+    measures = {"insertions": 0, "substitutions": 0, "deletions": 0, "hits": 0}
     if any(len(t) == 0 for t in reference):
         if any(len(t) != 0 for t in hypothesis):
-            measures['insertions'] = len(hypothesis)
+            measures["insertions"] = len(hypothesis)
     else:
-        measures = jiwer.compute_measures(truth=reference,
-                                          hypothesis=hypothesis,
-                                          truth_transform=transformation,
-                                          hypothesis_transform=transformation)
+        measures = jiwer.compute_measures(
+            truth=reference,
+            hypothesis=hypothesis,
+            truth_transform=transformation,
+            hypothesis_transform=transformation,
+        )
-    errors = measures['substitutions'] + measures['deletions'] + measures['insertions']
-    words = measures['hits'] + measures['substitutions'] + measures['deletions']
+    errors = measures["substitutions"] + measures["deletions"] + measures["insertions"]
+    words = measures["hits"] + measures["substitutions"] + measures["deletions"]
     if words != 0:
         wer = errors / words
-        substitutions_rate = measures['substitutions'] / words
-        deletions_rate = measures['deletions'] / words
-        insertions_rate = measures['insertions'] / words
+        substitutions_rate = measures["substitutions"] / words
+        deletions_rate = measures["deletions"] / words
+        insertions_rate = measures["insertions"] / words
     else:
-        wer = float('inf')
-        substitutions_rate = float('inf')
-        deletions_rate = float('inf')
-        insertions_rate = float('inf')
+        wer = float("inf")
+        substitutions_rate = float("inf")
+        deletions_rate = float("inf")
+        insertions_rate = float("inf")
-    return WerResult(wer=wer,
-                     words=int(words),
-                     substitutions=substitutions_rate,
-                     deletions=deletions_rate,
-                     insertions=insertions_rate)
+    return WerResult(
+        wer=wer,
+        words=int(words),
+        substitutions=substitutions_rate,
+        deletions=deletions_rate,
+        insertions=insertions_rate,
+    )

sonusai/metrics/calc_wsdr.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import numpy as np
-def calc_wsdr(hypothesis: np.ndarray,
-              reference: np.ndarray,
-              with_log: bool = False,
-              with_negate: bool = False) -> tuple[float, np.ndarray, np.ndarray]:
+def calc_wsdr(
+    hypothesis: np.ndarray,
+    reference: np.ndarray,
+    with_log: bool = False,
+    with_negate: bool = False,
+) -> tuple[float, np.ndarray, np.ndarray]:
     """Calculate weighted SDR (signal distortion ratio) using all source inputs of size [samples, nsrc].
        Uses true reference energy ratios to weight each cross-correlation coefficient cc = <y,yˆ>/∥y∥∥yˆ∥
        in a sum over all sources.
@@ -26,11 +28,12 @@ def calc_wsdr(hypothesis: np.ndarray,
     :return: (wsdr, ccoef, cweights)
     """
     nsrc = reference.shape[-1]
-    assert hypothesis.shape[-1] == nsrc
+    if hypothesis.shape[-1] != nsrc:
+        raise ValueError("hypothesis has wrong shape")
     # Calculate cc = <y,yˆ>/∥y∥∥yˆ∥ always in range -1 --> 1, size [1,nsrc]
-    ref_e = np.sum(reference ** 2, axis=0, keepdims=True)  # [1,nsrc]
-    hy_e = np.sum(hypothesis ** 2, axis=0, keepdims=True)
+    ref_e = np.sum(reference**2, axis=0, keepdims=True)  # [1,nsrc]
+    hy_e = np.sum(hypothesis**2, axis=0, keepdims=True)
     allref_e = np.sum(ref_e)
     cc = np.zeros(nsrc)  # calc correlation coefficient
     cw = np.zeros(nsrc)  # cc weights (energy ratio)

sonusai/metrics/class_summary.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# ruff: noqa: F821
 import numpy as np
 import pandas as pd
@@ -7,33 +8,35 @@ from sonusai.mixture import Predict
 from sonusai.mixture import Truth
-def class_summary(mixdb: MixtureDatabase,
-                  mixids: GeneralizedIDs,
-                  truth_f: Truth,
-                  predict: Predict,
-                  predict_thr: float | np.ndarray = 0,
-                  truth_thr: float = 0.5,
-                  timesteps: int = 0) -> pd.DataFrame:
-    """ Calculate table of metrics per class, and averages for a list
-        of mixtures using truth and prediction data [features, num_classes]
-        Example:
-        Generate multi-class metric summary into table, for example:
-                        PPV     TPR      F1     FPR     ACC   AP  AUC  Support
-            Class 1     0.71    0.80    0.75    0.00    0.99            44
-            Class 2     0.90    0.76    0.82    0.00    0.99            128
-            Class 3     0.86    0.82    0.84    0.04    0.93            789
-            Other       0.94    0.96    0.95    0.18    0.92            2807
+def class_summary(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs,
+    truth_f: Truth,
+    predict: Predict,
+    predict_thr: float | np.ndarray = 0,
+    truth_thr: float = 0.5,
+    timesteps: int = 0,
+) -> pd.DataFrame:
+    """Calculate table of metrics per class, and averages for a list
+    of mixtures using truth and prediction data [features, num_classes]
+    Example:
+    Generate multi-class metric summary into table, for example:
+                    PPV     TPR      F1     FPR     ACC   AP  AUC  Support
+        Class 1     0.71    0.80    0.75    0.00    0.99            44
+        Class 2     0.90    0.76    0.82    0.00    0.99            128
+        Class 3     0.86    0.82    0.84    0.04    0.93            789
+        Other       0.94    0.96    0.95    0.18    0.92            2807
-          micro-avg                     0.92    0.027                   3768
-          macro avg     0.85    0.83    0.84    0.05    0.96            3768
-          micro-avgwo
+      micro-avg                     0.92    0.027                   3768
+      macro avg     0.85    0.83    0.84    0.05    0.96            3768
+      micro-avgwo
     """
     from sonusai.metrics import one_hot
     num_classes = truth_f.shape[1]
     # TODO: re-work for modern mixdb API
-    y_truth_f, y_predict = get_mixids_data(mixdb, mixids, truth_f, predict)  # type: ignore
+    y_truth_f, y_predict = get_mixids_data(mixdb, mixids, truth_f, predict)  # type: ignore[name-defined]
     if not mixdb.truth_mutex and num_classes > 1:
         if not isinstance(predict_thr, np.ndarray):
@@ -49,25 +52,25 @@ def class_summary(mixdb: MixtureDatabase,
     # [ACC, TPR, PPV, TNR, FPR, HITFA, F1, MCC, NT, PT, TP, FP, AP, AUC]
     table_idx = np.array([2, 1, 6, 4, 0, 12, 13, 9])
-    col_n = ['PPV', 'TPR', 'F1', 'FPR', 'ACC', 'AP', 'AUC', 'Support']
+    col_n = ["PPV", "TPR", "F1", "FPR", "ACC", "AP", "AUC", "Support"]
     if mixdb.truth_mutex:
         if len(mixdb.class_labels) >= num_classes - 1:  # labels exist with or without Other
             row_n = mixdb.class_labels
             if len(mixdb.class_labels) == num_classes - 1:  # Other label does not exist, so add it
-                row_n.append('Other')
+                row_n.append("Other")
         else:
-            row_n = ([f'Class {i}' for i in range(1, num_classes)])
-            row_n.append('Other')
+            row_n = [f"Class {i}" for i in range(1, num_classes)]
+            row_n.append("Other")
     else:
         if len(mixdb.class_labels) == num_classes:
             row_n = mixdb.class_labels
         else:
-            row_n = ([f'Class {i}' for i in range(1, num_classes + 1)])
+            row_n = [f"Class {i}" for i in range(1, num_classes + 1)]
     df = pd.DataFrame(metrics[:, table_idx], columns=col_n, index=row_n)
     # [miPPV, miTPR, miF1, miFPR, miACC, miAP, miAUC, TPSUM]
-    avg_row_n = ['Macro-avg', 'Micro-avg', 'Weighted-avg']
+    avg_row_n = ["Macro-avg", "Micro-avg", "Weighted-avg"]
     dfavg = pd.DataFrame(metavg, columns=col_n, index=avg_row_n)
     # dfblank = pd.DataFrame([''])
@@ -75,6 +78,6 @@ def class_summary(mixdb: MixtureDatabase,
     classdf = pd.concat([df, dfavg])
     # classdf = classdf.round(2)
-    classdf['Support'] = classdf['Support'].astype(int)
+    classdf["Support"] = classdf["Support"].astype(int)
     return classdf

sonusai/metrics/confusion_matrix_summary.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# ruff: noqa: F821
 import numpy as np
 import pandas as pd
@@ -7,31 +8,33 @@ from sonusai.mixture import Predict
 from sonusai.mixture import Truth
-def confusion_matrix_summary(mixdb: MixtureDatabase,
-                             mixids: GeneralizedIDs,
-                             truth_f: Truth,
-                             predict: Predict,
-                             class_idx: int,
-                             predict_thr: float | np.ndarray = 0,
-                             truth_thr: float = 0.5,
-                             timesteps: int = 0) -> tuple[pd.DataFrame, pd.DataFrame]:
+def confusion_matrix_summary(
+    mixdb: MixtureDatabase,
+    mixids: GeneralizedIDs,
+    truth_f: Truth,
+    predict: Predict,
+    class_idx: int,
+    predict_thr: float | np.ndarray = 0,
+    truth_thr: float = 0.5,
+    timesteps: int = 0,
+) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Calculate confusion matrix for specified class, using truth and prediction
-       data [features, num_classes].
+    data [features, num_classes].
-       predict_thr sets the decision threshold(s) applied to predict data, thus allowing
-       predict to be continuous probabilities.
+    predict_thr sets the decision threshold(s) applied to predict data, thus allowing
+    predict to be continuous probabilities.
-       Default predict_thr=0 will infer 0.5 for multi-label mode (truth_mutex = False), or
-       if single-label mode (truth_mutex == True) then ignore and use argmax mode, and
-       the confusion matrix is calculated for all classes.
+    Default predict_thr=0 will infer 0.5 for multi-label mode (truth_mutex = False), or
+    if single-label mode (truth_mutex == True) then ignore and use argmax mode, and
+    the confusion matrix is calculated for all classes.
-       Returns pandas dataframes of confusion matrix cmdf and normalized confusion matrix cmndf.
+    Returns pandas dataframes of confusion matrix cmdf and normalized confusion matrix cmndf.
     """
     from sonusai.metrics import one_hot
     num_classes = truth_f.shape[1]
     # TODO: re-work for modern mixdb API
-    ytrue, ypred = get_mixids_data(mixdb=mixdb, mixids=mixids, truth_f=truth_f, predict=predict)  # type: ignore
+    ytrue, ypred = get_mixids_data(mixdb=mixdb, mixids=mixids, truth_f=truth_f, predict=predict)  # type: ignore[name-defined]
     # Check predict_thr array or scalar and return final scalar predict_thr value
     if not mixdb.truth_mutex and num_classes > 1:
@@ -56,16 +59,16 @@ def confusion_matrix_summary(mixdb: MixtureDatabase,
     if len(mixdb.class_labels) == num_classes:
         class_names = mixdb.class_labels
     else:
-        class_names = ([f'Class {i}' for i in range(1, num_classes + 1)])
+        class_names = [f"Class {i}" for i in range(1, num_classes + 1)]
-    class_nums = ([f'{i}' for i in range(1, num_classes + 1)])
+    class_nums = [f"{i}" for i in range(1, num_classes + 1)]
     if mixdb.truth_mutex:
         # single-label mode force to argmax mode
         predict_thr = np.array(0, dtype=np.float32)
         _, _, cm, cmn, _, _ = one_hot(ytrue, ypred, predict_thr, truth_thr, timesteps)
         row_n = class_names
-        row_n[-1] = 'Other'
+        row_n[-1] = "Other"
         # mux = pd.MultiIndex.from_product([['Single-label/mutex mode, truth thr = {}'.format(truth_thr)],
         #                                   class_nums])
         # mux = pd.MultiIndex.from_product([['truth thr = {}'.format(truth_thr)], class_nums])
@@ -76,12 +79,12 @@ def confusion_matrix_summary(mixdb: MixtureDatabase,
     else:
         _, _, cm, cmn, _, _ = one_hot(ytrue[:, class_idx], ypred[:, class_idx], predict_thr, truth_thr, timesteps)
         cname = class_names[class_idx]
-        row_n = ['TrueN', 'TrueP']
-        col_n = ['N-' + cname, 'P-' + cname]
+        row_n = ["TrueN", "TrueP"]
+        col_n = ["N-" + cname, "P-" + cname]
         cmdf = pd.DataFrame(cm, index=row_n, columns=col_n, dtype=np.int32)
         cmndf = pd.DataFrame(cmn, index=row_n, columns=col_n, dtype=np.float32)
         # add thresholds in 3rd row
-        pdnote = pd.DataFrame(np.atleast_2d([predict_thr, truth_thr]), index=['p/t thr:'], columns=col_n)
+        pdnote = pd.DataFrame(np.atleast_2d([predict_thr, truth_thr]), index=["p/t thr:"], columns=col_n)
         cmdf = pd.concat([cmdf, pdnote])
         cmndf = pd.concat([cmndf, pdnote])

sonusai 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl

sonusai 0.18.8py3-none-any.whl → 0.19.5py3-none-any.whl