sonusai 0.18.8__py3-none-any.whl → 0.19.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. sonusai/__init__.py +20 -29
  2. sonusai/aawscd_probwrite.py +18 -18
  3. sonusai/audiofe.py +93 -80
  4. sonusai/calc_metric_spenh.py +395 -321
  5. sonusai/data/genmixdb.yml +5 -11
  6. sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
  7. sonusai/{plot.py → deprecated/plot.py} +177 -131
  8. sonusai/{tplot.py → deprecated/tplot.py} +124 -102
  9. sonusai/doc/__init__.py +1 -1
  10. sonusai/doc/doc.py +112 -177
  11. sonusai/doc.py +10 -10
  12. sonusai/genft.py +93 -77
  13. sonusai/genmetrics.py +59 -46
  14. sonusai/genmix.py +116 -104
  15. sonusai/genmixdb.py +194 -153
  16. sonusai/lsdb.py +56 -66
  17. sonusai/main.py +23 -20
  18. sonusai/metrics/__init__.py +2 -0
  19. sonusai/metrics/calc_audio_stats.py +29 -24
  20. sonusai/metrics/calc_class_weights.py +7 -7
  21. sonusai/metrics/calc_optimal_thresholds.py +5 -7
  22. sonusai/metrics/calc_pcm.py +3 -3
  23. sonusai/metrics/calc_pesq.py +10 -7
  24. sonusai/metrics/calc_phase_distance.py +3 -3
  25. sonusai/metrics/calc_sa_sdr.py +10 -8
  26. sonusai/metrics/calc_segsnr_f.py +15 -17
  27. sonusai/metrics/calc_speech.py +105 -47
  28. sonusai/metrics/calc_wer.py +35 -32
  29. sonusai/metrics/calc_wsdr.py +10 -7
  30. sonusai/metrics/class_summary.py +30 -27
  31. sonusai/metrics/confusion_matrix_summary.py +25 -22
  32. sonusai/metrics/one_hot.py +91 -57
  33. sonusai/metrics/snr_summary.py +53 -46
  34. sonusai/mixture/__init__.py +19 -14
  35. sonusai/mixture/audio.py +4 -6
  36. sonusai/mixture/augmentation.py +37 -43
  37. sonusai/mixture/class_count.py +5 -14
  38. sonusai/mixture/config.py +292 -225
  39. sonusai/mixture/constants.py +41 -30
  40. sonusai/mixture/data_io.py +155 -0
  41. sonusai/mixture/datatypes.py +111 -108
  42. sonusai/mixture/db_datatypes.py +54 -70
  43. sonusai/mixture/eq_rule_is_valid.py +6 -9
  44. sonusai/mixture/feature.py +50 -46
  45. sonusai/mixture/generation.py +522 -389
  46. sonusai/mixture/helpers.py +217 -272
  47. sonusai/mixture/log_duration_and_sizes.py +16 -13
  48. sonusai/mixture/mixdb.py +677 -473
  49. sonusai/mixture/soundfile_audio.py +12 -17
  50. sonusai/mixture/sox_audio.py +91 -112
  51. sonusai/mixture/sox_augmentation.py +8 -9
  52. sonusai/mixture/spectral_mask.py +4 -6
  53. sonusai/mixture/target_class_balancing.py +41 -36
  54. sonusai/mixture/targets.py +69 -67
  55. sonusai/mixture/tokenized_shell_vars.py +23 -23
  56. sonusai/mixture/torchaudio_audio.py +14 -15
  57. sonusai/mixture/torchaudio_augmentation.py +23 -27
  58. sonusai/mixture/truth.py +48 -26
  59. sonusai/mixture/truth_functions/__init__.py +26 -0
  60. sonusai/mixture/truth_functions/crm.py +56 -38
  61. sonusai/mixture/truth_functions/datatypes.py +37 -0
  62. sonusai/mixture/truth_functions/energy.py +85 -59
  63. sonusai/mixture/truth_functions/file.py +30 -30
  64. sonusai/mixture/truth_functions/phoneme.py +14 -7
  65. sonusai/mixture/truth_functions/sed.py +71 -45
  66. sonusai/mixture/truth_functions/target.py +69 -106
  67. sonusai/mkwav.py +52 -85
  68. sonusai/onnx_predict.py +46 -43
  69. sonusai/queries/__init__.py +3 -1
  70. sonusai/queries/queries.py +100 -59
  71. sonusai/speech/__init__.py +2 -0
  72. sonusai/speech/l2arctic.py +24 -23
  73. sonusai/speech/librispeech.py +16 -17
  74. sonusai/speech/mcgill.py +22 -21
  75. sonusai/speech/textgrid.py +32 -25
  76. sonusai/speech/timit.py +45 -42
  77. sonusai/speech/vctk.py +14 -13
  78. sonusai/speech/voxceleb.py +26 -20
  79. sonusai/summarize_metric_spenh.py +11 -10
  80. sonusai/utils/__init__.py +4 -3
  81. sonusai/utils/asl_p56.py +1 -1
  82. sonusai/utils/asr.py +37 -17
  83. sonusai/utils/asr_functions/__init__.py +2 -0
  84. sonusai/utils/asr_functions/aaware_whisper.py +18 -12
  85. sonusai/utils/audio_devices.py +12 -12
  86. sonusai/utils/braced_glob.py +6 -8
  87. sonusai/utils/calculate_input_shape.py +1 -4
  88. sonusai/utils/compress.py +2 -2
  89. sonusai/utils/convert_string_to_number.py +1 -3
  90. sonusai/utils/create_timestamp.py +1 -1
  91. sonusai/utils/create_ts_name.py +2 -2
  92. sonusai/utils/dataclass_from_dict.py +1 -1
  93. sonusai/utils/docstring.py +6 -6
  94. sonusai/utils/energy_f.py +9 -7
  95. sonusai/utils/engineering_number.py +56 -54
  96. sonusai/utils/get_label_names.py +8 -10
  97. sonusai/utils/human_readable_size.py +2 -2
  98. sonusai/utils/model_utils.py +3 -5
  99. sonusai/utils/numeric_conversion.py +2 -4
  100. sonusai/utils/onnx_utils.py +43 -32
  101. sonusai/utils/parallel.py +40 -27
  102. sonusai/utils/print_mixture_details.py +25 -22
  103. sonusai/utils/ranges.py +12 -12
  104. sonusai/utils/read_predict_data.py +11 -9
  105. sonusai/utils/reshape.py +19 -26
  106. sonusai/utils/seconds_to_hms.py +1 -1
  107. sonusai/utils/stacked_complex.py +8 -16
  108. sonusai/utils/stratified_shuffle_split.py +29 -27
  109. sonusai/utils/write_audio.py +2 -2
  110. sonusai/utils/yes_or_no.py +3 -3
  111. sonusai/vars.py +14 -14
  112. {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/METADATA +20 -21
  113. sonusai-0.19.5.dist-info/RECORD +125 -0
  114. {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/WHEEL +1 -1
  115. sonusai/mixture/truth_functions/data.py +0 -58
  116. sonusai/utils/read_mixture_data.py +0 -14
  117. sonusai-0.18.8.dist-info/RECORD +0 -125
  118. {sonusai-0.18.8.dist-info → sonusai-0.19.5.dist-info}/entry_points.txt +0 -0
@@ -1,72 +1,56 @@
1
1
  from collections import namedtuple
2
2
 
3
- TruthSettingRecord = namedtuple('TruthSettingRecord', [
4
- 'id',
5
- 'setting'])
6
-
7
- TargetFileRecord = namedtuple('TargetFileRecord', [
8
- 'id',
9
- 'name',
10
- 'samples',
11
- 'level_type',
12
- 'speaker_id'])
13
-
14
- NoiseFileRecord = namedtuple('NoiseFileRecord', [
15
- 'id',
16
- 'name',
17
- 'samples'])
18
-
19
- TopRecord = namedtuple('TopRecord', [
20
- 'id',
21
- 'version',
22
- 'class_balancing',
23
- 'feature',
24
- 'noise_mix_mode',
25
- 'num_classes',
26
- 'seed',
27
- 'truth_mutex',
28
- 'truth_reduction_function',
29
- 'mixid_width',
30
- 'speaker_metadata_tiers',
31
- 'textgrid_metadata_tiers'])
32
-
33
- ClassLabelRecord = namedtuple('ClassLabelRecord', [
34
- 'id',
35
- 'label'])
36
-
37
- ClassWeightsThresholdRecord = namedtuple('ClassWeightsThresholdRecord', [
38
- 'id',
39
- 'threshold'])
40
-
41
- ImpulseResponseFileRecord = namedtuple('ImpulseResponseFileRecord', [
42
- 'id',
43
- 'file'])
44
-
45
- SpectralMaskRecord = namedtuple('SpectralMaskRecord', [
46
- 'id',
47
- 'f_max_width',
48
- 'f_num',
49
- 't_max_width',
50
- 't_num',
51
- 't_max_percent'])
52
-
53
- TargetRecord = namedtuple('TargetRecord', [
54
- 'id',
55
- 'file_id',
56
- 'augmentation',
57
- 'gain'])
58
-
59
- MixtureRecord = namedtuple('MixtureRecord', [
60
- 'id',
61
- 'name',
62
- 'noise_file_id',
63
- 'noise_augmentation',
64
- 'noise_offset',
65
- 'noise_snr_gain',
66
- 'random_snr',
67
- 'snr',
68
- 'samples',
69
- 'spectral_mask_id',
70
- 'spectral_mask_seed',
71
- 'target_snr_gain'
72
- ])
3
+ TruthConfigRecord = namedtuple("TruthConfigRecord", ["id", "name", "function", "stride_reduction", "config"])
4
+
5
+ TruthParametersRecord = namedtuple("TruthParametersRecord", ["id", "name", "parameters"])
6
+
7
+ TargetFileRecord = namedtuple("TargetFileRecord", ["id", "name", "samples", "class_indices", "level_type", "speaker_id"])
8
+
9
+ NoiseFileRecord = namedtuple("NoiseFileRecord", ["id", "name", "samples"])
10
+
11
+ TopRecord = namedtuple(
12
+ "TopRecord",
13
+ [
14
+ "id",
15
+ "version",
16
+ "class_balancing",
17
+ "feature",
18
+ "noise_mix_mode",
19
+ "num_classes",
20
+ "seed",
21
+ "mixid_width",
22
+ "speaker_metadata_tiers",
23
+ "textgrid_metadata_tiers",
24
+ ],
25
+ )
26
+
27
+ ClassLabelRecord = namedtuple("ClassLabelRecord", ["id", "label"])
28
+
29
+ ClassWeightsThresholdRecord = namedtuple("ClassWeightsThresholdRecord", ["id", "threshold"])
30
+
31
+ ImpulseResponseFileRecord = namedtuple("ImpulseResponseFileRecord", ["id", "file"])
32
+
33
+ SpectralMaskRecord = namedtuple(
34
+ "SpectralMaskRecord",
35
+ ["id", "f_max_width", "f_num", "t_max_width", "t_num", "t_max_percent"],
36
+ )
37
+
38
+ TargetRecord = namedtuple("TargetRecord", ["id", "file_id", "augmentation", "gain"])
39
+
40
+ MixtureRecord = namedtuple(
41
+ "MixtureRecord",
42
+ [
43
+ "id",
44
+ "name",
45
+ "noise_file_id",
46
+ "noise_augmentation",
47
+ "noise_offset",
48
+ "noise_snr_gain",
49
+ "random_snr",
50
+ "snr",
51
+ "samples",
52
+ "spectral_mask_id",
53
+ "spectral_mask_seed",
54
+ "target_snr_gain",
55
+ ],
56
+ )
@@ -8,7 +8,7 @@ def eq_rule_is_valid(rule: Any) -> bool:
8
8
  """
9
9
 
10
10
  # Must be a list or string equal to 'none'
11
- if isinstance(rule, str) and rule == 'none':
11
+ if isinstance(rule, str) and rule == "none":
12
12
  return True
13
13
 
14
14
  if not isinstance(rule, list):
@@ -27,22 +27,19 @@ def eq_rule_is_valid(rule: Any) -> bool:
27
27
  if not all(isinstance(el, float | int | str) for el in r):
28
28
  return False
29
29
 
30
- if isinstance(r, str) and r == 'none':
30
+ if isinstance(r, str) and r == "none":
31
31
  continue
32
32
 
33
33
  for el in r:
34
34
  # If a string, item must start with 'rand'
35
- if isinstance(el, str) and not el.startswith('rand'):
35
+ if isinstance(el, str) and not el.startswith("rand"):
36
36
  return False
37
37
 
38
38
  return True
39
39
 
40
40
 
41
41
  def _check_for_none(rule: Any) -> bool:
42
- """Check if EQ rule is 'none'
43
- """
44
- if isinstance(rule, str) and rule == 'none':
45
- return True
46
- if isinstance(rule, list) and len(rule) == 3:
42
+ """Check if EQ rule is 'none'"""
43
+ if isinstance(rule, str) and rule == "none":
47
44
  return True
48
- return False
45
+ return bool(isinstance(rule, list) and len(rule) == 3)
@@ -1,46 +1,38 @@
1
- from typing import Optional
2
-
3
1
  from sonusai.mixture.datatypes import AudioT
4
2
  from sonusai.mixture.datatypes import Feature
5
3
 
6
4
 
7
- def get_feature_from_audio(audio: AudioT,
8
- feature_mode: str,
9
- num_classes: Optional[int] = 1,
10
- truth_mutex: Optional[bool] = False) -> Feature:
5
+ def get_feature_from_audio(
6
+ audio: AudioT,
7
+ feature_mode: str,
8
+ ) -> Feature:
11
9
  """Apply forward transform and generate feature data from audio data
12
10
 
13
11
  :param audio: Time domain audio data [samples]
14
12
  :param feature_mode: Feature mode
15
- :param num_classes: Number of classes
16
- :param truth_mutex: Whether to calculate 'other' label
17
13
  :return: Feature data [frames, strides, feature_parameters]
18
14
  """
19
15
  import numpy as np
20
16
  from pyaaware import FeatureGenerator
21
17
 
22
- from .augmentation import pad_audio_to_frame
23
18
  from .datatypes import TransformConfig
24
19
  from .helpers import forward_transform
25
20
 
26
- fg = FeatureGenerator(feature_mode=feature_mode,
27
- num_classes=num_classes,
28
- truth_mutex=truth_mutex)
29
-
30
- feature_step_samples = fg.ftransform_R * fg.decimation * fg.step
31
- audio = pad_audio_to_frame(audio, feature_step_samples)
32
-
33
- audio_f = forward_transform(audio=audio,
34
- config=TransformConfig(N=fg.ftransform_N,
35
- R=fg.ftransform_R,
36
- bin_start=fg.bin_start,
37
- bin_end=fg.bin_end,
38
- ttype=fg.ftransform_ttype))
39
-
40
- samples = len(audio)
41
- transform_frames = samples // fg.ftransform_R
42
- feature_frames = samples // feature_step_samples
43
-
21
+ fg = FeatureGenerator(feature_mode=feature_mode)
22
+
23
+ audio_f = forward_transform(
24
+ audio=audio,
25
+ config=TransformConfig(
26
+ length=fg.ftransform_length,
27
+ overlap=fg.ftransform_overlap,
28
+ bin_start=fg.bin_start,
29
+ bin_end=fg.bin_end,
30
+ ttype=fg.ftransform_ttype,
31
+ ),
32
+ )
33
+
34
+ transform_frames = audio_f.shape[0]
35
+ feature_frames = transform_frames // (fg.decimation * fg.step)
44
36
  feature = np.empty((feature_frames, fg.stride, fg.feature_parameters), dtype=np.float32)
45
37
 
46
38
  feature_frame = 0
@@ -54,37 +46,49 @@ def get_feature_from_audio(audio: AudioT,
54
46
  return feature
55
47
 
56
48
 
57
- def get_audio_from_feature(feature: Feature,
58
- feature_mode: str,
59
- num_classes: Optional[int] = 1,
60
- truth_mutex: Optional[bool] = False) -> AudioT:
49
+ def get_audio_from_feature(
50
+ feature: Feature,
51
+ feature_mode: str,
52
+ num_classes: int | None = 1,
53
+ truth_mutex: bool | None = False,
54
+ ) -> AudioT:
61
55
  """Apply inverse transform to feature data to generate audio data
62
56
 
63
- :param feature: Feature data [frames, strides, feature_parameters]
57
+ :param feature: Feature data [frames, stride=1, feature_parameters]
64
58
  :param feature_mode: Feature mode
65
59
  :param num_classes: Number of classes
66
60
  :param truth_mutex: Whether to calculate 'other' label
67
61
  :return: Audio data [samples]
68
62
  """
69
63
  import numpy as np
70
-
71
64
  from pyaaware import FeatureGenerator
72
65
 
66
+ from sonusai.utils.compress import power_uncompress
67
+ from sonusai.utils.stacked_complex import unstack_complex
68
+
73
69
  from .datatypes import TransformConfig
74
70
  from .helpers import inverse_transform
75
- from sonusai.utils.stacked_complex import unstack_complex
76
- from sonusai.utils.compress import power_uncompress
77
71
 
78
- fg = FeatureGenerator(feature_mode=feature_mode,
79
- num_classes=num_classes,
80
- truth_mutex=truth_mutex)
72
+ if feature.ndim != 3:
73
+ raise ValueError("feature must have 3 dimensions: [frames, stride=1, feature_parameters]")
74
+
75
+ if feature.shape[1] != 1:
76
+ raise ValueError("Strided feature data is not supported for audio extraction; stride must be 1.")
77
+
78
+ fg = FeatureGenerator(feature_mode=feature_mode, num_classes=num_classes, truth_mutex=truth_mutex)
81
79
 
82
- feature_complex = unstack_complex(feature)
83
- if feature_mode[0:1] == 'h':
80
+ feature_complex = unstack_complex(feature.squeeze())
81
+ if feature_mode[0:1] == "h":
84
82
  feature_complex = power_uncompress(feature_complex)
85
- return np.squeeze(inverse_transform(transform=feature_complex,
86
- config=TransformConfig(N=fg.itransform_N,
87
- R=fg.itransform_R,
88
- bin_start=fg.bin_start,
89
- bin_end=fg.bin_end,
90
- ttype=fg.itransform_ttype)))
83
+ return np.squeeze(
84
+ inverse_transform(
85
+ transform=feature_complex,
86
+ config=TransformConfig(
87
+ length=fg.itransform_length,
88
+ overlap=fg.itransform_overlap,
89
+ bin_start=fg.bin_start,
90
+ bin_end=fg.bin_end,
91
+ ttype=fg.itransform_ttype,
92
+ ),
93
+ )
94
+ )