sonusai 0.18.8__py3-none-any.whl → 0.18.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,9 +27,6 @@ def get_feature_from_audio(audio: AudioT,
27
27
  num_classes=num_classes,
28
28
  truth_mutex=truth_mutex)
29
29
 
30
- feature_step_samples = fg.ftransform_R * fg.decimation * fg.step
31
- audio = pad_audio_to_frame(audio, feature_step_samples)
32
-
33
30
  audio_f = forward_transform(audio=audio,
34
31
  config=TransformConfig(N=fg.ftransform_N,
35
32
  R=fg.ftransform_R,
@@ -37,10 +34,8 @@ def get_feature_from_audio(audio: AudioT,
37
34
  bin_end=fg.bin_end,
38
35
  ttype=fg.ftransform_ttype))
39
36
 
40
- samples = len(audio)
41
- transform_frames = samples // fg.ftransform_R
42
- feature_frames = samples // feature_step_samples
43
-
37
+ transform_frames = audio_f.shape[0]
38
+ feature_frames = transform_frames // (fg.decimation * fg.step)
44
39
  feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
45
40
 
46
41
  feature_frame = 0
@@ -60,7 +55,7 @@ def get_audio_from_feature(feature: Feature,
60
55
  truth_mutex: Optional[bool] = False) -> AudioT:
61
56
  """Apply inverse transform to feature data to generate audio data
62
57
 
63
- :param feature: Feature data [frames, strides, feature_parameters]
58
+ :param feature: Feature data [frames, stride=1, feature_parameters]
64
59
  :param feature_mode: Feature mode
65
60
  :param num_classes: Number of classes
66
61
  :param truth_mutex: Whether to calculate 'other' label
@@ -70,16 +65,23 @@ def get_audio_from_feature(feature: Feature,
70
65
 
71
66
  from pyaaware import FeatureGenerator
72
67
 
68
+ from sonusai import SonusAIError
73
69
  from .datatypes import TransformConfig
74
70
  from .helpers import inverse_transform
75
71
  from sonusai.utils.stacked_complex import unstack_complex
76
72
  from sonusai.utils.compress import power_uncompress
77
73
 
74
+ if feature.ndim != 3:
75
+ raise SonusAIError('feature must have 3 dimensions: [frames, stride=1, feature_parameters]')
76
+
77
+ if feature.shape[1] != 1:
78
+ raise SonusAIError('Strided feature data is not supported for audio extraction; stride must be 1.')
79
+
78
80
  fg = FeatureGenerator(feature_mode=feature_mode,
79
81
  num_classes=num_classes,
80
82
  truth_mutex=truth_mutex)
81
83
 
82
- feature_complex = unstack_complex(feature)
84
+ feature_complex = unstack_complex(feature.squeeze())
83
85
  if feature_mode[0:1] == 'h':
84
86
  feature_complex = power_uncompress(feature_complex)
85
87
  return np.squeeze(inverse_transform(transform=feature_complex,
sonusai/mixture/mixdb.py CHANGED
@@ -218,8 +218,10 @@ class MixtureDatabase:
218
218
  f'Target[0] ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
219
219
  metrics.append(MetricDoc('Mixture Metrics', f'mxasr.{name}',
220
220
  f'ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
221
+ metrics.append(MetricDoc('Target Metrics', f'basewer.{name}',
222
+ f'Word error rate of tasr.{name} vs. speech text metadata for the target'))
221
223
  metrics.append(MetricDoc('Mixture Metrics', f'mxwer.{name}',
222
- f'Word error rate using {name} ASR as defined in mixdb asr_configs parameter'))
224
+ f'Word error rate of mxasr.{name} vs. tasr.{name}'))
223
225
 
224
226
  return metrics
225
227
 
@@ -1405,6 +1407,16 @@ class MixtureDatabase:
1405
1407
  # TODO: should this be NaN like above?
1406
1408
  return float(0)
1407
1409
 
1410
+ if m.startswith('basewer'):
1411
+ asr_name = get_asr_name(m)
1412
+
1413
+ text = self.mixture_speech_metadata(m_id, 'text')[0]
1414
+ if text is not None:
1415
+ return calc_wer(target_asr(asr_name), text).wer * 100
1416
+
1417
+ # TODO: should this be NaN like above?
1418
+ return float(0)
1419
+
1408
1420
  if m.startswith('mxasr'):
1409
1421
  return mixture_asr(get_asr_name(m))
1410
1422
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.18.8
3
+ Version: 0.18.9
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -42,11 +42,11 @@ sonusai/mixture/constants.py,sha256=90qaRIEcmIoS3Od5h_UP0_SkkvG2aE_eYPv6WsIktC0,
42
42
  sonusai/mixture/datatypes.py,sha256=2vegllgZcmFLq5NjqS7Lo97dOpOJOAj0Eml4ggP_tGo,10966
43
43
  sonusai/mixture/db_datatypes.py,sha256=GDYbcSrlgUJsesiUUNnR4s5aBkMgviiNSQDaBcgYX7I,1428
44
44
  sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
45
- sonusai/mixture/feature.py,sha256=bHAPRaYGyS-ZTOb-RLCwDau7n1NDKsVEW30Gd9SRZYo,3676
45
+ sonusai/mixture/feature.py,sha256=kYomwZpuvPQAZdb2MCaJBD8UD5LD2w5jTIkkRldaFlM,3839
46
46
  sonusai/mixture/generation.py,sha256=W3n6ipI-dxg4Wj6YBJn8RTpFqkAyIXzxwObeFbSLq08,42801
47
47
  sonusai/mixture/helpers.py,sha256=9x7gezEqPm5xKGAbwCqDMjedVEmoDWyFR_5-T_5nlno,24740
48
48
  sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
49
- sonusai/mixture/mixdb.py,sha256=mr9Ck4p_mCfvz1PXoUgjWcw9F-Rlw3uGiDizUvPqo2A,64359
49
+ sonusai/mixture/mixdb.py,sha256=EoH-kwg-zVJLAqpxbRKV7TtCxPqiBo3rIfdvCeZhEyI,64872
50
50
  sonusai/mixture/soundfile_audio.py,sha256=BwO4lftNvrhoPTJERONcrpxSpM2fjO6kL_e5Ylz742A,4220
51
51
  sonusai/mixture/sox_audio.py,sha256=DbHuyLtEuQYtKsIRxx6g1webW_LsdgLz52P5VO37MqI,17119
52
52
  sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
@@ -119,7 +119,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2z
119
119
  sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
120
120
  sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
121
121
  sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
122
- sonusai-0.18.8.dist-info/METADATA,sha256=KqBUJv7yMq-3lDPNfezRqi_z28ZB0w0mDEXJfBtlVmA,2591
123
- sonusai-0.18.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
124
- sonusai-0.18.8.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
125
- sonusai-0.18.8.dist-info/RECORD,,
122
+ sonusai-0.18.9.dist-info/METADATA,sha256=GdYfD7ldc9oJoMQxNgpG8Vs-RFOmP597X306RuMGi_M,2591
123
+ sonusai-0.18.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
124
+ sonusai-0.18.9.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
125
+ sonusai-0.18.9.dist-info/RECORD,,