PyPI - sonusai - Versions diffs - 0.18.8__py3-none-any.whl → 0.18.9__py3-none-any.whl - Mend

sonusai 0.18.8py3-none-any.whl → 0.18.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

sonusai/mixture/feature.py CHANGED Viewed

@@ -27,9 +27,6 @@ def get_feature_from_audio(audio: AudioT,
                           num_classes=num_classes,
                           truth_mutex=truth_mutex)
-    feature_step_samples = fg.ftransform_R * fg.decimation * fg.step
-    audio = pad_audio_to_frame(audio, feature_step_samples)
     audio_f = forward_transform(audio=audio,
                                 config=TransformConfig(N=fg.ftransform_N,
                                                        R=fg.ftransform_R,
@@ -37,10 +34,8 @@ def get_feature_from_audio(audio: AudioT,
                                                        bin_end=fg.bin_end,
                                                        ttype=fg.ftransform_ttype))
-    samples = len(audio)
-    transform_frames = samples // fg.ftransform_R
-    feature_frames = samples // feature_step_samples
+    transform_frames = audio_f.shape[0]
+    feature_frames = transform_frames // (fg.decimation * fg.step)
     feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
     feature_frame = 0
@@ -60,7 +55,7 @@ def get_audio_from_feature(feature: Feature,
                            truth_mutex: Optional[bool] = False) -> AudioT:
     """Apply inverse transform to feature data to generate audio data
-    :param feature: Feature data [frames, strides, feature_parameters]
+    :param feature: Feature data [frames, stride=1, feature_parameters]
     :param feature_mode: Feature mode
     :param num_classes: Number of classes
     :param truth_mutex: Whether to calculate 'other' label
@@ -70,16 +65,23 @@ def get_audio_from_feature(feature: Feature,
     from pyaaware import FeatureGenerator
+    from sonusai import SonusAIError
     from .datatypes import TransformConfig
     from .helpers import inverse_transform
     from sonusai.utils.stacked_complex import unstack_complex
     from sonusai.utils.compress import power_uncompress
+    if feature.ndim != 3:
+        raise SonusAIError('feature must have 3 dimensions: [frames, stride=1, feature_parameters]')
+    if feature.shape[1] != 1:
+        raise SonusAIError('Strided feature data is not supported for audio extraction; stride must be 1.')
     fg = FeatureGenerator(feature_mode=feature_mode,
                           num_classes=num_classes,
                           truth_mutex=truth_mutex)
-    feature_complex = unstack_complex(feature)
+    feature_complex = unstack_complex(feature.squeeze())
     if feature_mode[0:1] == 'h':
         feature_complex = power_uncompress(feature_complex)
     return np.squeeze(inverse_transform(transform=feature_complex,

sonusai/mixture/mixdb.py CHANGED Viewed

@@ -218,8 +218,10 @@ class MixtureDatabase:
                                      f'Target[0] ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
             metrics.append(MetricDoc('Mixture Metrics', f'mxasr.{name}',
                                      f'ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
+            metrics.append(MetricDoc('Target Metrics', f'basewer.{name}',
+                                     f'Word error rate of tasr.{name} vs. speech text metadata for the target'))
             metrics.append(MetricDoc('Mixture Metrics', f'mxwer.{name}',
-                                     f'Word error rate using {name} ASR as defined in mixdb asr_configs parameter'))
+                                     f'Word error rate of mxasr.{name} vs. tasr.{name}'))
         return metrics
@@ -1405,6 +1407,16 @@ class MixtureDatabase:
                 # TODO: should this be NaN like above?
                 return float(0)
+            if m.startswith('basewer'):
+                asr_name = get_asr_name(m)
+                text = self.mixture_speech_metadata(m_id, 'text')[0]
+                if text is not None:
+                    return calc_wer(target_asr(asr_name), text).wer * 100
+                # TODO: should this be NaN like above?
+                return float(0)
             if m.startswith('mxasr'):
                 return mixture_asr(get_asr_name(m))

{sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonusai
-Version: 0.18.8
+Version: 0.18.9
 Summary: Framework for building deep neural network models for sound, speech, and voice AI
 Home-page: https://aaware.com
 License: GPL-3.0-only

{sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/RECORD RENAMED Viewed

@@ -42,11 +42,11 @@ sonusai/mixture/constants.py,sha256=90qaRIEcmIoS3Od5h_UP0_SkkvG2aE_eYPv6WsIktC0,
 sonusai/mixture/datatypes.py,sha256=2vegllgZcmFLq5NjqS7Lo97dOpOJOAj0Eml4ggP_tGo,10966
 sonusai/mixture/db_datatypes.py,sha256=GDYbcSrlgUJsesiUUNnR4s5aBkMgviiNSQDaBcgYX7I,1428
 sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
-sonusai/mixture/feature.py,sha256=bHAPRaYGyS-ZTOb-RLCwDau7n1NDKsVEW30Gd9SRZYo,3676
+sonusai/mixture/feature.py,sha256=kYomwZpuvPQAZdb2MCaJBD8UD5LD2w5jTIkkRldaFlM,3839
 sonusai/mixture/generation.py,sha256=W3n6ipI-dxg4Wj6YBJn8RTpFqkAyIXzxwObeFbSLq08,42801
 sonusai/mixture/helpers.py,sha256=9x7gezEqPm5xKGAbwCqDMjedVEmoDWyFR_5-T_5nlno,24740
 sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
-sonusai/mixture/mixdb.py,sha256=mr9Ck4p_mCfvz1PXoUgjWcw9F-Rlw3uGiDizUvPqo2A,64359
+sonusai/mixture/mixdb.py,sha256=EoH-kwg-zVJLAqpxbRKV7TtCxPqiBo3rIfdvCeZhEyI,64872
 sonusai/mixture/soundfile_audio.py,sha256=BwO4lftNvrhoPTJERONcrpxSpM2fjO6kL_e5Ylz742A,4220
 sonusai/mixture/sox_audio.py,sha256=DbHuyLtEuQYtKsIRxx6g1webW_LsdgLz52P5VO37MqI,17119
 sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
@@ -119,7 +119,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2z
 sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
 sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
 sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
-sonusai-0.18.8.dist-info/METADATA,sha256=KqBUJv7yMq-3lDPNfezRqi_z28ZB0w0mDEXJfBtlVmA,2591
-sonusai-0.18.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-sonusai-0.18.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
-sonusai-0.18.8.dist-info/RECORD,,
+sonusai-0.18.9.dist-info/METADATA,sha256=GdYfD7ldc9oJoMQxNgpG8Vs-RFOmP597X306RuMGi_M,2591
+sonusai-0.18.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+sonusai-0.18.9.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
+sonusai-0.18.9.dist-info/RECORD,,

{sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

sonusai 0.18.8__py3-none-any.whl → 0.18.9__py3-none-any.whl

sonusai 0.18.8py3-none-any.whl → 0.18.9py3-none-any.whl