sonusai 0.18.7__tar.gz → 0.18.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {sonusai-0.18.7 → sonusai-0.18.9}/PKG-INFO +1 -1
  2. {sonusai-0.18.7 → sonusai-0.18.9}/pyproject.toml +1 -1
  3. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/__init__.py +0 -1
  4. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/feature.py +11 -9
  5. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/helpers.py +4 -2
  6. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/mixdb.py +77 -19
  7. sonusai-0.18.7/sonusai/post_spenh_targetf.py +0 -160
  8. {sonusai-0.18.7 → sonusai-0.18.9}/README.rst +0 -0
  9. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/aawscd_probwrite.py +0 -0
  10. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/audiofe.py +0 -0
  11. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/calc_metric_spenh.py +0 -0
  12. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/data/__init__.py +0 -0
  13. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/data/genmixdb.yml +0 -0
  14. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/data/speech_ma01_01.wav +0 -0
  15. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/data/whitenoise.wav +0 -0
  16. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/doc/__init__.py +0 -0
  17. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/doc/doc.py +0 -0
  18. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/doc.py +0 -0
  19. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/genft.py +0 -0
  20. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/genmetrics.py +0 -0
  21. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/genmix.py +0 -0
  22. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/genmixdb.py +0 -0
  23. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/gentcst.py +0 -0
  24. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/lsdb.py +0 -0
  25. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/main.py +0 -0
  26. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/__init__.py +0 -0
  27. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_audio_stats.py +0 -0
  28. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_class_weights.py +0 -0
  29. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
  30. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_pcm.py +0 -0
  31. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_pesq.py +0 -0
  32. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_phase_distance.py +0 -0
  33. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_sa_sdr.py +0 -0
  34. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_sample_weights.py +0 -0
  35. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_segsnr_f.py +0 -0
  36. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_speech.py +0 -0
  37. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_wer.py +0 -0
  38. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/calc_wsdr.py +0 -0
  39. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/class_summary.py +0 -0
  40. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/confusion_matrix_summary.py +0 -0
  41. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/one_hot.py +0 -0
  42. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/metrics/snr_summary.py +0 -0
  43. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/__init__.py +0 -0
  44. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/audio.py +0 -0
  45. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/augmentation.py +0 -0
  46. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/class_count.py +0 -0
  47. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/config.py +0 -0
  48. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/constants.py +0 -0
  49. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/datatypes.py +0 -0
  50. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/db_datatypes.py +0 -0
  51. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/eq_rule_is_valid.py +0 -0
  52. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/generation.py +0 -0
  53. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/log_duration_and_sizes.py +0 -0
  54. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/soundfile_audio.py +0 -0
  55. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/sox_audio.py +0 -0
  56. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/sox_augmentation.py +0 -0
  57. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/spectral_mask.py +0 -0
  58. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/target_class_balancing.py +0 -0
  59. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/targets.py +0 -0
  60. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/tokenized_shell_vars.py +0 -0
  61. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/torchaudio_audio.py +0 -0
  62. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/torchaudio_augmentation.py +0 -0
  63. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth.py +0 -0
  64. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/__init__.py +0 -0
  65. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/crm.py +0 -0
  66. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/data.py +0 -0
  67. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/energy.py +0 -0
  68. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/file.py +0 -0
  69. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/phoneme.py +0 -0
  70. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/sed.py +0 -0
  71. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mixture/truth_functions/target.py +0 -0
  72. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/mkwav.py +0 -0
  73. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/onnx_predict.py +0 -0
  74. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/plot.py +0 -0
  75. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/queries/__init__.py +0 -0
  76. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/queries/queries.py +0 -0
  77. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/__init__.py +0 -0
  78. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/l2arctic.py +0 -0
  79. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/librispeech.py +0 -0
  80. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/mcgill.py +0 -0
  81. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/textgrid.py +0 -0
  82. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/timit.py +0 -0
  83. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/types.py +0 -0
  84. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/vctk.py +0 -0
  85. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/speech/voxceleb.py +0 -0
  86. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/summarize_metric_spenh.py +0 -0
  87. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/tplot.py +0 -0
  88. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/__init__.py +0 -0
  89. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/asl_p56.py +0 -0
  90. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/asr.py +0 -0
  91. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/asr_functions/__init__.py +0 -0
  92. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/asr_functions/aaware_whisper.py +0 -0
  93. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/audio_devices.py +0 -0
  94. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/braced_glob.py +0 -0
  95. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/calculate_input_shape.py +0 -0
  96. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/compress.py +0 -0
  97. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/convert_string_to_number.py +0 -0
  98. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/create_timestamp.py +0 -0
  99. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/create_ts_name.py +0 -0
  100. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/dataclass_from_dict.py +0 -0
  101. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/db.py +0 -0
  102. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/docstring.py +0 -0
  103. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/energy_f.py +0 -0
  104. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/engineering_number.py +0 -0
  105. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/get_frames_per_batch.py +0 -0
  106. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/get_label_names.py +0 -0
  107. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/grouper.py +0 -0
  108. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/human_readable_size.py +0 -0
  109. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/max_text_width.py +0 -0
  110. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/model_utils.py +0 -0
  111. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/numeric_conversion.py +0 -0
  112. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/onnx_utils.py +0 -0
  113. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/parallel.py +0 -0
  114. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/path_info.py +0 -0
  115. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/print_mixture_details.py +0 -0
  116. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/ranges.py +0 -0
  117. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/read_mixture_data.py +0 -0
  118. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/read_predict_data.py +0 -0
  119. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/reshape.py +0 -0
  120. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/seconds_to_hms.py +0 -0
  121. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/stacked_complex.py +0 -0
  122. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/stratified_shuffle_split.py +0 -0
  123. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/write_audio.py +0 -0
  124. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/utils/yes_or_no.py +0 -0
  125. {sonusai-0.18.7 → sonusai-0.18.9}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.18.7
3
+ Version: 0.18.9
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sonusai"
3
- version = "0.18.7"
3
+ version = "0.18.9"
4
4
  description = "Framework for building deep neural network models for sound, speech, and voice AI"
5
5
  authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
6
6
  maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -24,7 +24,6 @@ commands_doc = """
24
24
  mkwav Make WAV files from a mixture database
25
25
  onnx_predict Run ONNX predict on a trained model
26
26
  plot Plot mixture data
27
- post_spenh_targetf Run post-processing for speech enhancement targetf data
28
27
  summarize_metric_spenh Summarize speech enhancement and analysis results
29
28
  tplot Plot truth data
30
29
  vars List custom SonusAI variables
@@ -27,9 +27,6 @@ def get_feature_from_audio(audio: AudioT,
27
27
  num_classes=num_classes,
28
28
  truth_mutex=truth_mutex)
29
29
 
30
- feature_step_samples = fg.ftransform_R * fg.decimation * fg.step
31
- audio = pad_audio_to_frame(audio, feature_step_samples)
32
-
33
30
  audio_f = forward_transform(audio=audio,
34
31
  config=TransformConfig(N=fg.ftransform_N,
35
32
  R=fg.ftransform_R,
@@ -37,10 +34,8 @@ def get_feature_from_audio(audio: AudioT,
37
34
  bin_end=fg.bin_end,
38
35
  ttype=fg.ftransform_ttype))
39
36
 
40
- samples = len(audio)
41
- transform_frames = samples // fg.ftransform_R
42
- feature_frames = samples // feature_step_samples
43
-
37
+ transform_frames = audio_f.shape[0]
38
+ feature_frames = transform_frames // (fg.decimation * fg.step)
44
39
  feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
45
40
 
46
41
  feature_frame = 0
@@ -60,7 +55,7 @@ def get_audio_from_feature(feature: Feature,
60
55
  truth_mutex: Optional[bool] = False) -> AudioT:
61
56
  """Apply inverse transform to feature data to generate audio data
62
57
 
63
- :param feature: Feature data [frames, strides, feature_parameters]
58
+ :param feature: Feature data [frames, stride=1, feature_parameters]
64
59
  :param feature_mode: Feature mode
65
60
  :param num_classes: Number of classes
66
61
  :param truth_mutex: Whether to calculate 'other' label
@@ -70,16 +65,23 @@ def get_audio_from_feature(feature: Feature,
70
65
 
71
66
  from pyaaware import FeatureGenerator
72
67
 
68
+ from sonusai import SonusAIError
73
69
  from .datatypes import TransformConfig
74
70
  from .helpers import inverse_transform
75
71
  from sonusai.utils.stacked_complex import unstack_complex
76
72
  from sonusai.utils.compress import power_uncompress
77
73
 
74
+ if feature.ndim != 3:
75
+ raise SonusAIError('feature must have 3 dimensions: [frames, stride=1, feature_parameters]')
76
+
77
+ if feature.shape[1] != 1:
78
+ raise SonusAIError('Strided feature data is not supported for audio extraction; stride must be 1.')
79
+
78
80
  fg = FeatureGenerator(feature_mode=feature_mode,
79
81
  num_classes=num_classes,
80
82
  truth_mutex=truth_mutex)
81
83
 
82
- feature_complex = unstack_complex(feature)
84
+ feature_complex = unstack_complex(feature.squeeze())
83
85
  if feature_mode[0:1] == 'h':
84
86
  feature_complex = power_uncompress(feature_complex)
85
87
  return np.squeeze(inverse_transform(transform=feature_complex,
@@ -276,7 +276,6 @@ def read_mixture_data(name: str, items: list[str] | str) -> Any:
276
276
  :return: Data (or tuple of data)
277
277
  """
278
278
  from os.path import exists
279
- from typing import Any
280
279
 
281
280
  import h5py
282
281
  import numpy as np
@@ -287,7 +286,10 @@ def read_mixture_data(name: str, items: list[str] | str) -> Any:
287
286
  if d_name in file:
288
287
  data = np.array(file[d_name])
289
288
  if data.size == 1:
290
- return data.item()
289
+ item = data.item()
290
+ if isinstance(item, bytes):
291
+ return item.decode('utf-8')
292
+ return item
291
293
  return data
292
294
  return None
293
295
 
@@ -214,8 +214,14 @@ class MixtureDatabase:
214
214
  MetricDoc('Truth Metrics', 'sedtopn', '(not implemented) N most active by largest sedavg [N, 1]'),
215
215
  ])
216
216
  for name in self.asr_configs:
217
+ metrics.append(MetricDoc('Target Metrics', f'tasr.{name}',
218
+ f'Target[0] ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
219
+ metrics.append(MetricDoc('Mixture Metrics', f'mxasr.{name}',
220
+ f'ASR text using {name} ASR as defined in mixdb asr_configs parameter'))
221
+ metrics.append(MetricDoc('Target Metrics', f'basewer.{name}',
222
+ f'Word error rate of tasr.{name} vs. speech text metadata for the target'))
217
223
  metrics.append(MetricDoc('Mixture Metrics', f'mxwer.{name}',
218
- f'Word error rate using {name} ASR as defined in mixdb asr_configs parameter'))
224
+ f'Word error rate of mxasr.{name} vs. tasr.{name}'))
219
225
 
220
226
  return metrics
221
227
 
@@ -1185,7 +1191,7 @@ class MixtureDatabase:
1185
1191
 
1186
1192
  def mixture_metrics(self, m_id: int,
1187
1193
  metrics: list[str],
1188
- force: bool = False) -> list[float | int | Segsnr]:
1194
+ force: bool = False) -> list[float | int | str | Segsnr]:
1189
1195
  """Get metrics data for the given mixture ID
1190
1196
 
1191
1197
  :param m_id: Zero-based mixture ID
@@ -1328,7 +1334,56 @@ class MixtureDatabase:
1328
1334
 
1329
1335
  noise_stats = create_noise_stats()
1330
1336
 
1331
- def calc(m: str) -> float | int | Segsnr:
1337
+ def create_asr_config() -> Callable[[str], dict]:
1338
+ state: dict[str, dict] = {}
1339
+
1340
+ def get(asr_name) -> dict:
1341
+ nonlocal state
1342
+ if asr_name not in state:
1343
+ state[asr_name] = self.asr_configs.get(asr_name, None)
1344
+ if state[asr_name] is None:
1345
+ raise SonusAIError(f"Unrecognized ASR name: '{asr_name}'")
1346
+ return state[asr_name]
1347
+
1348
+ return get
1349
+
1350
+ asr_config = create_asr_config()
1351
+
1352
+ def create_target_asr() -> Callable[[str], str]:
1353
+ state: dict[str, str] = {}
1354
+
1355
+ def get(asr_name) -> str:
1356
+ nonlocal state
1357
+ if asr_name not in state:
1358
+ state[asr_name] = calc_asr(target_audio(), **asr_config(asr_name)).text
1359
+ return state[asr_name]
1360
+
1361
+ return get
1362
+
1363
+ target_asr = create_target_asr()
1364
+
1365
+ def create_mixture_asr() -> Callable[[str], str]:
1366
+ state: dict[str, str] = {}
1367
+
1368
+ def get(asr_name) -> str:
1369
+ nonlocal state
1370
+ if asr_name not in state:
1371
+ state[asr_name] = calc_asr(mixture_audio(), **asr_config(asr_name)).text
1372
+ return state[asr_name]
1373
+
1374
+ return get
1375
+
1376
+ mixture_asr = create_mixture_asr()
1377
+
1378
+ def get_asr_name(m: str) -> str:
1379
+ parts = m.split('.')
1380
+ if len(parts) != 2:
1381
+ raise SonusAIError(
1382
+ f"Unrecognized format: '{m}'; must be of the form: '<metric>.<name>'")
1383
+ asr_name = parts[1]
1384
+ return asr_name
1385
+
1386
+ def calc(m: str) -> float | int | str | Segsnr:
1332
1387
  if m == 'mxsnr':
1333
1388
  return self.mixture(m_id).snr
1334
1389
 
@@ -1340,31 +1395,31 @@ class MixtureDatabase:
1340
1395
 
1341
1396
  # Otherwise, generate data as needed
1342
1397
  if m.startswith('mxwer'):
1343
- parts = m.split('.')
1344
- if len(parts) != 2:
1345
- raise SonusAIError(
1346
- f"Unrecognized 'mxwer' format: '{m}'; must be of the form: 'mxwer.<name>'")
1347
- asr_name = parts[1]
1348
- asr_config = self.asr_configs.get(asr_name, None)
1349
- if asr_config is None:
1350
- raise SonusAIError(f"Unrecognized metric: '{m}'")
1398
+ asr_name = get_asr_name(m)
1351
1399
 
1352
1400
  if self.mixture(m_id).snr < -96:
1353
1401
  # noise only, ignore/reset target asr
1354
1402
  return float('nan')
1355
1403
 
1356
- # ignore mixup
1357
- target_asr = self.mixture_speech_metadata(m_id, 'text')[0]
1358
- if target_asr is None:
1359
- target_asr = calc_asr(target_audio(), **asr_config).text
1404
+ if target_asr(asr_name):
1405
+ return calc_wer(mixture_asr(asr_name), target_asr(asr_name)).wer * 100
1360
1406
 
1361
- if target_asr:
1362
- mixture_asr = calc_asr(mixture_audio(), **asr_config).text
1363
- return calc_wer(mixture_asr, target_asr).wer * 100
1407
+ # TODO: should this be NaN like above?
1408
+ return float(0)
1409
+
1410
+ if m.startswith('basewer'):
1411
+ asr_name = get_asr_name(m)
1412
+
1413
+ text = self.mixture_speech_metadata(m_id, 'text')[0]
1414
+ if text is not None:
1415
+ return calc_wer(target_asr(asr_name), text).wer * 100
1364
1416
 
1365
1417
  # TODO: should this be NaN like above?
1366
1418
  return float(0)
1367
1419
 
1420
+ if m.startswith('mxasr'):
1421
+ return mixture_asr(get_asr_name(m))
1422
+
1368
1423
  if m == 'mxssnr_avg':
1369
1424
  return calc_segsnr_f(segsnr_f()).avg
1370
1425
 
@@ -1454,6 +1509,9 @@ class MixtureDatabase:
1454
1509
  if m == 'tpkc':
1455
1510
  return target_stats().pkc
1456
1511
 
1512
+ if m.startswith('tasr'):
1513
+ return target_asr(get_asr_name(m))
1514
+
1457
1515
  if m == 'ndco':
1458
1516
  return noise_stats().dco
1459
1517
 
@@ -1501,7 +1559,7 @@ class MixtureDatabase:
1501
1559
 
1502
1560
  raise SonusAIError(f"Unrecognized metric: '{m}'")
1503
1561
 
1504
- result: list[float | int | Segsnr] = []
1562
+ result: list[float | int | str | Segsnr] = []
1505
1563
  for metric in metrics:
1506
1564
  result.append(calc(metric))
1507
1565
 
@@ -1,160 +0,0 @@
1
- """sonusai post_spenh_targetf
2
-
3
- usage: post_spenh_targetf [-hv] (-m MODEL) (-w KMODEL) INPUT ...
4
-
5
- options:
6
- -h, --help
7
- -v, --verbose Be verbose.
8
- -m MODEL, --model MODEL Python model file.
9
- -w KMODEL, --weights KMODEL Keras model weights file.
10
-
11
- Run post-processing on speech enhancement targetf prediction data.
12
-
13
- Inputs:
14
- MODEL A SonusAI Python model file with build and/or hypermodel functions.
15
- KMODEL A Keras model weights file (or model file with weights).
16
- INPUT A single H5 file or a glob of H5 files
17
-
18
- Outputs the following to post_spenh_targetf-<TIMESTAMP> directory:
19
- <name>.wav
20
- post_spenh_targetf.log
21
-
22
- """
23
- import signal
24
- from dataclasses import dataclass
25
-
26
-
27
- def signal_handler(_sig, _frame):
28
- import sys
29
-
30
- from sonusai import logger
31
-
32
- logger.info('Canceled due to keyboard interrupt')
33
- sys.exit(1)
34
-
35
-
36
- signal.signal(signal.SIGINT, signal_handler)
37
-
38
-
39
- @dataclass
40
- class MPGlobal:
41
- N: int = None
42
- R: int = None
43
- bin_start: int = None
44
- bin_end: int = None
45
- ttype: str = None
46
- output_dir: str = None
47
-
48
-
49
- MP_GLOBAL = MPGlobal()
50
-
51
-
52
- def main() -> None:
53
- from docopt import docopt
54
-
55
- import sonusai
56
- from sonusai.utils import trim_docstring
57
-
58
- args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
59
-
60
- verbose = args['--verbose']
61
- model_name = args['--model']
62
- weights_name = args['--weights']
63
- input_name = args['INPUT']
64
-
65
- import time
66
- from os import makedirs
67
- from os.path import isfile
68
- from os.path import join
69
- from os.path import splitext
70
-
71
- from pyaaware import FeatureGenerator
72
- from tqdm import tqdm
73
-
74
- from sonusai import create_file_handler
75
- from sonusai import initial_log_messages
76
- from sonusai import logger
77
- from sonusai import update_console_handler
78
- from sonusai.utils import create_ts_name
79
- from sonusai.utils import import_and_check_keras_model
80
- from sonusai.utils import pp_tqdm_imap
81
- from sonusai.utils import seconds_to_hms
82
-
83
- start_time = time.monotonic()
84
-
85
- output_dir = create_ts_name('post_spenh_targetf')
86
- makedirs(output_dir, exist_ok=True)
87
-
88
- # Setup logging file
89
- create_file_handler(join(output_dir, 'post_spenh_targetf.log'))
90
- update_console_handler(verbose)
91
- initial_log_messages('post_spenh_targetf')
92
-
93
- hypermodel = import_and_check_keras_model(model_name=model_name, weights_name=weights_name)
94
-
95
- fg = FeatureGenerator(feature_mode=hypermodel.feature,
96
- num_classes=hypermodel.num_classes,
97
- truth_mutex=hypermodel.truth_mutex)
98
-
99
- MP_GLOBAL.N = fg.itransform_N
100
- MP_GLOBAL.R = fg.itransform_R
101
- MP_GLOBAL.bin_start = fg.bin_start
102
- MP_GLOBAL.bin_end = fg.bin_end
103
- MP_GLOBAL.ttype = fg.itransform_ttype
104
- MP_GLOBAL.output_dir = output_dir
105
-
106
- if not all(isfile(file) and splitext(file)[1] == '.h5' for file in input_name):
107
- logger.exception(f'Do not know how to process input from {input_name}')
108
- raise SystemExit(1)
109
-
110
- logger.info('')
111
- logger.info(f'Found {len(input_name):,} files to process')
112
-
113
- progress = tqdm(total=len(input_name))
114
- pp_tqdm_imap(_process, input_name, progress=progress)
115
- progress.close()
116
-
117
- logger.info(f'Wrote {len(input_name)} mixtures to {output_dir}')
118
- logger.info('')
119
-
120
- end_time = time.monotonic()
121
- logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
122
- logger.info('')
123
-
124
-
125
- def _process(file: str) -> None:
126
- """Run extraction on predict data to generate estimation audio
127
- """
128
- from os.path import basename
129
- from os.path import join
130
- from os.path import splitext
131
-
132
- import h5py
133
- import numpy as np
134
- from sonusai import InverseTransform
135
-
136
- from sonusai import SonusAIError
137
- from sonusai.mixture import get_audio_from_transform
138
- from sonusai.utils import float_to_int16
139
- from sonusai.utils import unstack_complex
140
- from sonusai.utils import write_audio
141
-
142
- try:
143
- with h5py.File(file, 'r') as f:
144
- predict = unstack_complex(np.array(f['predict']))
145
- except Exception as e:
146
- raise SonusAIError(f'Error reading {file}: {e}')
147
-
148
- output_name = join(MP_GLOBAL.output_dir, splitext(basename(file))[0] + '.wav')
149
- audio, _ = get_audio_from_transform(data=predict,
150
- transform=InverseTransform(N=MP_GLOBAL.N,
151
- R=MP_GLOBAL.R,
152
- bin_start=MP_GLOBAL.bin_start,
153
- bin_end=MP_GLOBAL.bin_end,
154
- ttype=MP_GLOBAL.ttype,
155
- gain=np.float32(1)))
156
- write_audio(name=output_name, audio=float_to_int16(audio))
157
-
158
-
159
- if __name__ == '__main__':
160
- main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes