sonusai 0.18.8__py3-none-any.whl → 0.18.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/mixture/feature.py +11 -9
- sonusai/mixture/mixdb.py +13 -1
- {sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/METADATA +1 -1
- {sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/RECORD +6 -6
- {sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/WHEEL +0 -0
- {sonusai-0.18.8.dist-info → sonusai-0.18.9.dist-info}/entry_points.txt +0 -0
sonusai/mixture/feature.py
CHANGED
@@ -27,9 +27,6 @@ def get_feature_from_audio(audio: AudioT,
|
|
27
27
|
num_classes=num_classes,
|
28
28
|
truth_mutex=truth_mutex)
|
29
29
|
|
30
|
-
feature_step_samples = fg.ftransform_R * fg.decimation * fg.step
|
31
|
-
audio = pad_audio_to_frame(audio, feature_step_samples)
|
32
|
-
|
33
30
|
audio_f = forward_transform(audio=audio,
|
34
31
|
config=TransformConfig(N=fg.ftransform_N,
|
35
32
|
R=fg.ftransform_R,
|
@@ -37,10 +34,8 @@ def get_feature_from_audio(audio: AudioT,
|
|
37
34
|
bin_end=fg.bin_end,
|
38
35
|
ttype=fg.ftransform_ttype))
|
39
36
|
|
40
|
-
|
41
|
-
|
42
|
-
feature_frames = samples // feature_step_samples
|
43
|
-
|
37
|
+
transform_frames = audio_f.shape[0]
|
38
|
+
feature_frames = transform_frames // (fg.decimation * fg.step)
|
44
39
|
feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
|
45
40
|
|
46
41
|
feature_frame = 0
|
@@ -60,7 +55,7 @@ def get_audio_from_feature(feature: Feature,
|
|
60
55
|
truth_mutex: Optional[bool] = False) -> AudioT:
|
61
56
|
"""Apply inverse transform to feature data to generate audio data
|
62
57
|
|
63
|
-
:param feature: Feature data [frames,
|
58
|
+
:param feature: Feature data [frames, stride=1, feature_parameters]
|
64
59
|
:param feature_mode: Feature mode
|
65
60
|
:param num_classes: Number of classes
|
66
61
|
:param truth_mutex: Whether to calculate 'other' label
|
@@ -70,16 +65,23 @@ def get_audio_from_feature(feature: Feature,
|
|
70
65
|
|
71
66
|
from pyaaware import FeatureGenerator
|
72
67
|
|
68
|
+
from sonusai import SonusAIError
|
73
69
|
from .datatypes import TransformConfig
|
74
70
|
from .helpers import inverse_transform
|
75
71
|
from sonusai.utils.stacked_complex import unstack_complex
|
76
72
|
from sonusai.utils.compress import power_uncompress
|
77
73
|
|
74
|
+
if feature.ndim != 3:
|
75
|
+
raise SonusAIError('feature must have 3 dimensions: [frames, stride=1, feature_parameters]')
|
76
|
+
|
77
|
+
if feature.shape[1] != 1:
|
78
|
+
raise SonusAIError('Strided feature data is not supported for audio extraction; stride must be 1.')
|
79
|
+
|
78
80
|
fg = FeatureGenerator(feature_mode=feature_mode,
|
79
81
|
num_classes=num_classes,
|
80
82
|
truth_mutex=truth_mutex)
|
81
83
|
|
82
|
-
feature_complex = unstack_complex(feature)
|
84
|
+
feature_complex = unstack_complex(feature.squeeze())
|
83
85
|
if feature_mode[0:1] == 'h':
|
84
86
|
feature_complex = power_uncompress(feature_complex)
|
85
87
|
return np.squeeze(inverse_transform(transform=feature_complex,
|
sonusai/mixture/mixdb.py
CHANGED
@@ -218,8 +218,10 @@ class MixtureDatabase:
|
|
218
218
|
f'Target[0] ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
|
219
219
|
metrics.append(MetricDoc('Mixture Metrics', f'mxasr.{name}',
|
220
220
|
f'ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
|
221
|
+
metrics.append(MetricDoc('Target Metrics', f'basewer.{name}',
|
222
|
+
f'Word error rate of tasr.{name} vs. speech text metadata for the target'))
|
221
223
|
metrics.append(MetricDoc('Mixture Metrics', f'mxwer.{name}',
|
222
|
-
f'Word error rate
|
224
|
+
f'Word error rate of mxasr.{name} vs. tasr.{name}'))
|
223
225
|
|
224
226
|
return metrics
|
225
227
|
|
@@ -1405,6 +1407,16 @@ class MixtureDatabase:
|
|
1405
1407
|
# TODO: should this be NaN like above?
|
1406
1408
|
return float(0)
|
1407
1409
|
|
1410
|
+
if m.startswith('basewer'):
|
1411
|
+
asr_name = get_asr_name(m)
|
1412
|
+
|
1413
|
+
text = self.mixture_speech_metadata(m_id, 'text')[0]
|
1414
|
+
if text is not None:
|
1415
|
+
return calc_wer(target_asr(asr_name), text).wer * 100
|
1416
|
+
|
1417
|
+
# TODO: should this be NaN like above?
|
1418
|
+
return float(0)
|
1419
|
+
|
1408
1420
|
if m.startswith('mxasr'):
|
1409
1421
|
return mixture_asr(get_asr_name(m))
|
1410
1422
|
|
@@ -42,11 +42,11 @@ sonusai/mixture/constants.py,sha256=90qaRIEcmIoS3Od5h_UP0_SkkvG2aE_eYPv6WsIktC0,
|
|
42
42
|
sonusai/mixture/datatypes.py,sha256=2vegllgZcmFLq5NjqS7Lo97dOpOJOAj0Eml4ggP_tGo,10966
|
43
43
|
sonusai/mixture/db_datatypes.py,sha256=GDYbcSrlgUJsesiUUNnR4s5aBkMgviiNSQDaBcgYX7I,1428
|
44
44
|
sonusai/mixture/eq_rule_is_valid.py,sha256=MpQwRA5M76wSiQWEI1lW2cLFdPaMttBLcQp3tWD8efM,1243
|
45
|
-
sonusai/mixture/feature.py,sha256=
|
45
|
+
sonusai/mixture/feature.py,sha256=kYomwZpuvPQAZdb2MCaJBD8UD5LD2w5jTIkkRldaFlM,3839
|
46
46
|
sonusai/mixture/generation.py,sha256=W3n6ipI-dxg4Wj6YBJn8RTpFqkAyIXzxwObeFbSLq08,42801
|
47
47
|
sonusai/mixture/helpers.py,sha256=9x7gezEqPm5xKGAbwCqDMjedVEmoDWyFR_5-T_5nlno,24740
|
48
48
|
sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
|
49
|
-
sonusai/mixture/mixdb.py,sha256=
|
49
|
+
sonusai/mixture/mixdb.py,sha256=EoH-kwg-zVJLAqpxbRKV7TtCxPqiBo3rIfdvCeZhEyI,64872
|
50
50
|
sonusai/mixture/soundfile_audio.py,sha256=BwO4lftNvrhoPTJERONcrpxSpM2fjO6kL_e5Ylz742A,4220
|
51
51
|
sonusai/mixture/sox_audio.py,sha256=DbHuyLtEuQYtKsIRxx6g1webW_LsdgLz52P5VO37MqI,17119
|
52
52
|
sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
|
@@ -119,7 +119,7 @@ sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2z
|
|
119
119
|
sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
|
120
120
|
sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
|
121
121
|
sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
|
122
|
-
sonusai-0.18.
|
123
|
-
sonusai-0.18.
|
124
|
-
sonusai-0.18.
|
125
|
-
sonusai-0.18.
|
122
|
+
sonusai-0.18.9.dist-info/METADATA,sha256=GdYfD7ldc9oJoMQxNgpG8Vs-RFOmP597X306RuMGi_M,2591
|
123
|
+
sonusai-0.18.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
124
|
+
sonusai-0.18.9.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
|
125
|
+
sonusai-0.18.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|