sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. sonusai/__init__.py +20 -29
  2. sonusai/aawscd_probwrite.py +18 -18
  3. sonusai/audiofe.py +93 -80
  4. sonusai/calc_metric_spenh.py +395 -321
  5. sonusai/data/genmixdb.yml +5 -11
  6. sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
  7. sonusai/{plot.py → deprecated/plot.py} +177 -131
  8. sonusai/{tplot.py → deprecated/tplot.py} +124 -102
  9. sonusai/doc/__init__.py +1 -1
  10. sonusai/doc/doc.py +112 -177
  11. sonusai/doc.py +10 -10
  12. sonusai/genft.py +81 -91
  13. sonusai/genmetrics.py +51 -61
  14. sonusai/genmix.py +105 -115
  15. sonusai/genmixdb.py +201 -174
  16. sonusai/lsdb.py +56 -66
  17. sonusai/main.py +23 -20
  18. sonusai/metrics/__init__.py +2 -0
  19. sonusai/metrics/calc_audio_stats.py +29 -24
  20. sonusai/metrics/calc_class_weights.py +7 -7
  21. sonusai/metrics/calc_optimal_thresholds.py +5 -7
  22. sonusai/metrics/calc_pcm.py +3 -3
  23. sonusai/metrics/calc_pesq.py +10 -7
  24. sonusai/metrics/calc_phase_distance.py +3 -3
  25. sonusai/metrics/calc_sa_sdr.py +10 -8
  26. sonusai/metrics/calc_segsnr_f.py +16 -18
  27. sonusai/metrics/calc_speech.py +105 -47
  28. sonusai/metrics/calc_wer.py +35 -32
  29. sonusai/metrics/calc_wsdr.py +10 -7
  30. sonusai/metrics/class_summary.py +30 -27
  31. sonusai/metrics/confusion_matrix_summary.py +25 -22
  32. sonusai/metrics/one_hot.py +91 -57
  33. sonusai/metrics/snr_summary.py +53 -46
  34. sonusai/mixture/__init__.py +20 -14
  35. sonusai/mixture/audio.py +4 -6
  36. sonusai/mixture/augmentation.py +37 -43
  37. sonusai/mixture/class_count.py +5 -14
  38. sonusai/mixture/config.py +292 -225
  39. sonusai/mixture/constants.py +41 -30
  40. sonusai/mixture/data_io.py +155 -0
  41. sonusai/mixture/datatypes.py +111 -108
  42. sonusai/mixture/db_datatypes.py +54 -70
  43. sonusai/mixture/eq_rule_is_valid.py +6 -9
  44. sonusai/mixture/feature.py +40 -38
  45. sonusai/mixture/generation.py +522 -389
  46. sonusai/mixture/helpers.py +217 -272
  47. sonusai/mixture/log_duration_and_sizes.py +16 -13
  48. sonusai/mixture/mixdb.py +669 -477
  49. sonusai/mixture/soundfile_audio.py +12 -17
  50. sonusai/mixture/sox_audio.py +91 -112
  51. sonusai/mixture/sox_augmentation.py +8 -9
  52. sonusai/mixture/spectral_mask.py +4 -6
  53. sonusai/mixture/target_class_balancing.py +41 -36
  54. sonusai/mixture/targets.py +69 -67
  55. sonusai/mixture/tokenized_shell_vars.py +23 -23
  56. sonusai/mixture/torchaudio_audio.py +14 -15
  57. sonusai/mixture/torchaudio_augmentation.py +23 -27
  58. sonusai/mixture/truth.py +48 -26
  59. sonusai/mixture/truth_functions/__init__.py +26 -0
  60. sonusai/mixture/truth_functions/crm.py +56 -38
  61. sonusai/mixture/truth_functions/datatypes.py +37 -0
  62. sonusai/mixture/truth_functions/energy.py +85 -59
  63. sonusai/mixture/truth_functions/file.py +30 -30
  64. sonusai/mixture/truth_functions/phoneme.py +14 -7
  65. sonusai/mixture/truth_functions/sed.py +71 -45
  66. sonusai/mixture/truth_functions/target.py +69 -106
  67. sonusai/mkwav.py +58 -101
  68. sonusai/onnx_predict.py +46 -43
  69. sonusai/queries/__init__.py +3 -1
  70. sonusai/queries/queries.py +100 -59
  71. sonusai/speech/__init__.py +2 -0
  72. sonusai/speech/l2arctic.py +24 -23
  73. sonusai/speech/librispeech.py +16 -17
  74. sonusai/speech/mcgill.py +22 -21
  75. sonusai/speech/textgrid.py +32 -25
  76. sonusai/speech/timit.py +45 -42
  77. sonusai/speech/vctk.py +14 -13
  78. sonusai/speech/voxceleb.py +26 -20
  79. sonusai/summarize_metric_spenh.py +11 -10
  80. sonusai/utils/__init__.py +4 -3
  81. sonusai/utils/asl_p56.py +1 -1
  82. sonusai/utils/asr.py +37 -17
  83. sonusai/utils/asr_functions/__init__.py +2 -0
  84. sonusai/utils/asr_functions/aaware_whisper.py +18 -12
  85. sonusai/utils/audio_devices.py +12 -12
  86. sonusai/utils/braced_glob.py +6 -8
  87. sonusai/utils/calculate_input_shape.py +1 -4
  88. sonusai/utils/compress.py +2 -2
  89. sonusai/utils/convert_string_to_number.py +1 -3
  90. sonusai/utils/create_timestamp.py +1 -1
  91. sonusai/utils/create_ts_name.py +2 -2
  92. sonusai/utils/dataclass_from_dict.py +1 -1
  93. sonusai/utils/docstring.py +6 -6
  94. sonusai/utils/energy_f.py +9 -7
  95. sonusai/utils/engineering_number.py +56 -54
  96. sonusai/utils/get_label_names.py +8 -10
  97. sonusai/utils/human_readable_size.py +2 -2
  98. sonusai/utils/model_utils.py +3 -5
  99. sonusai/utils/numeric_conversion.py +2 -4
  100. sonusai/utils/onnx_utils.py +43 -32
  101. sonusai/utils/parallel.py +41 -30
  102. sonusai/utils/print_mixture_details.py +25 -22
  103. sonusai/utils/ranges.py +12 -12
  104. sonusai/utils/read_predict_data.py +11 -9
  105. sonusai/utils/reshape.py +19 -26
  106. sonusai/utils/seconds_to_hms.py +1 -1
  107. sonusai/utils/stacked_complex.py +8 -16
  108. sonusai/utils/stratified_shuffle_split.py +29 -27
  109. sonusai/utils/write_audio.py +2 -2
  110. sonusai/utils/yes_or_no.py +3 -3
  111. sonusai/vars.py +14 -14
  112. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
  113. sonusai-0.19.6.dist-info/RECORD +125 -0
  114. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
  115. sonusai/mixture/truth_functions/data.py +0 -58
  116. sonusai/utils/read_mixture_data.py +0 -14
  117. sonusai-0.18.9.dist-info/RECORD +0 -125
  118. {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
@@ -1,72 +1,56 @@
1
1
  from collections import namedtuple
2
2
 
3
- TruthSettingRecord = namedtuple('TruthSettingRecord', [
4
- 'id',
5
- 'setting'])
6
-
7
- TargetFileRecord = namedtuple('TargetFileRecord', [
8
- 'id',
9
- 'name',
10
- 'samples',
11
- 'level_type',
12
- 'speaker_id'])
13
-
14
- NoiseFileRecord = namedtuple('NoiseFileRecord', [
15
- 'id',
16
- 'name',
17
- 'samples'])
18
-
19
- TopRecord = namedtuple('TopRecord', [
20
- 'id',
21
- 'version',
22
- 'class_balancing',
23
- 'feature',
24
- 'noise_mix_mode',
25
- 'num_classes',
26
- 'seed',
27
- 'truth_mutex',
28
- 'truth_reduction_function',
29
- 'mixid_width',
30
- 'speaker_metadata_tiers',
31
- 'textgrid_metadata_tiers'])
32
-
33
- ClassLabelRecord = namedtuple('ClassLabelRecord', [
34
- 'id',
35
- 'label'])
36
-
37
- ClassWeightsThresholdRecord = namedtuple('ClassWeightsThresholdRecord', [
38
- 'id',
39
- 'threshold'])
40
-
41
- ImpulseResponseFileRecord = namedtuple('ImpulseResponseFileRecord', [
42
- 'id',
43
- 'file'])
44
-
45
- SpectralMaskRecord = namedtuple('SpectralMaskRecord', [
46
- 'id',
47
- 'f_max_width',
48
- 'f_num',
49
- 't_max_width',
50
- 't_num',
51
- 't_max_percent'])
52
-
53
- TargetRecord = namedtuple('TargetRecord', [
54
- 'id',
55
- 'file_id',
56
- 'augmentation',
57
- 'gain'])
58
-
59
- MixtureRecord = namedtuple('MixtureRecord', [
60
- 'id',
61
- 'name',
62
- 'noise_file_id',
63
- 'noise_augmentation',
64
- 'noise_offset',
65
- 'noise_snr_gain',
66
- 'random_snr',
67
- 'snr',
68
- 'samples',
69
- 'spectral_mask_id',
70
- 'spectral_mask_seed',
71
- 'target_snr_gain'
72
- ])
3
+ TruthConfigRecord = namedtuple("TruthConfigRecord", ["id", "name", "function", "stride_reduction", "config"])
4
+
5
+ TruthParametersRecord = namedtuple("TruthParametersRecord", ["id", "name", "parameters"])
6
+
7
+ TargetFileRecord = namedtuple("TargetFileRecord", ["id", "name", "samples", "class_indices", "level_type", "speaker_id"])
8
+
9
+ NoiseFileRecord = namedtuple("NoiseFileRecord", ["id", "name", "samples"])
10
+
11
+ TopRecord = namedtuple(
12
+ "TopRecord",
13
+ [
14
+ "id",
15
+ "version",
16
+ "class_balancing",
17
+ "feature",
18
+ "noise_mix_mode",
19
+ "num_classes",
20
+ "seed",
21
+ "mixid_width",
22
+ "speaker_metadata_tiers",
23
+ "textgrid_metadata_tiers",
24
+ ],
25
+ )
26
+
27
+ ClassLabelRecord = namedtuple("ClassLabelRecord", ["id", "label"])
28
+
29
+ ClassWeightsThresholdRecord = namedtuple("ClassWeightsThresholdRecord", ["id", "threshold"])
30
+
31
+ ImpulseResponseFileRecord = namedtuple("ImpulseResponseFileRecord", ["id", "file"])
32
+
33
+ SpectralMaskRecord = namedtuple(
34
+ "SpectralMaskRecord",
35
+ ["id", "f_max_width", "f_num", "t_max_width", "t_num", "t_max_percent"],
36
+ )
37
+
38
+ TargetRecord = namedtuple("TargetRecord", ["id", "file_id", "augmentation", "gain"])
39
+
40
+ MixtureRecord = namedtuple(
41
+ "MixtureRecord",
42
+ [
43
+ "id",
44
+ "name",
45
+ "noise_file_id",
46
+ "noise_augmentation",
47
+ "noise_offset",
48
+ "noise_snr_gain",
49
+ "random_snr",
50
+ "snr",
51
+ "samples",
52
+ "spectral_mask_id",
53
+ "spectral_mask_seed",
54
+ "target_snr_gain",
55
+ ],
56
+ )
@@ -8,7 +8,7 @@ def eq_rule_is_valid(rule: Any) -> bool:
8
8
  """
9
9
 
10
10
  # Must be a list or string equal to 'none'
11
- if isinstance(rule, str) and rule == 'none':
11
+ if isinstance(rule, str) and rule == "none":
12
12
  return True
13
13
 
14
14
  if not isinstance(rule, list):
@@ -27,22 +27,19 @@ def eq_rule_is_valid(rule: Any) -> bool:
27
27
  if not all(isinstance(el, float | int | str) for el in r):
28
28
  return False
29
29
 
30
- if isinstance(r, str) and r == 'none':
30
+ if isinstance(r, str) and r == "none":
31
31
  continue
32
32
 
33
33
  for el in r:
34
34
  # If a string, item must start with 'rand'
35
- if isinstance(el, str) and not el.startswith('rand'):
35
+ if isinstance(el, str) and not el.startswith("rand"):
36
36
  return False
37
37
 
38
38
  return True
39
39
 
40
40
 
41
41
  def _check_for_none(rule: Any) -> bool:
42
- """Check if EQ rule is 'none'
43
- """
44
- if isinstance(rule, str) and rule == 'none':
45
- return True
46
- if isinstance(rule, list) and len(rule) == 3:
42
+ """Check if EQ rule is 'none'"""
43
+ if isinstance(rule, str) and rule == "none":
47
44
  return True
48
- return False
45
+ return bool(isinstance(rule, list) and len(rule) == 3)
@@ -1,38 +1,35 @@
1
- from typing import Optional
2
-
3
1
  from sonusai.mixture.datatypes import AudioT
4
2
  from sonusai.mixture.datatypes import Feature
5
3
 
6
4
 
7
- def get_feature_from_audio(audio: AudioT,
8
- feature_mode: str,
9
- num_classes: Optional[int] = 1,
10
- truth_mutex: Optional[bool] = False) -> Feature:
5
+ def get_feature_from_audio(
6
+ audio: AudioT,
7
+ feature_mode: str,
8
+ ) -> Feature:
11
9
  """Apply forward transform and generate feature data from audio data
12
10
 
13
11
  :param audio: Time domain audio data [samples]
14
12
  :param feature_mode: Feature mode
15
- :param num_classes: Number of classes
16
- :param truth_mutex: Whether to calculate 'other' label
17
13
  :return: Feature data [frames, strides, feature_parameters]
18
14
  """
19
15
  import numpy as np
20
16
  from pyaaware import FeatureGenerator
21
17
 
22
- from .augmentation import pad_audio_to_frame
23
18
  from .datatypes import TransformConfig
24
19
  from .helpers import forward_transform
25
20
 
26
- fg = FeatureGenerator(feature_mode=feature_mode,
27
- num_classes=num_classes,
28
- truth_mutex=truth_mutex)
21
+ fg = FeatureGenerator(feature_mode=feature_mode)
29
22
 
30
- audio_f = forward_transform(audio=audio,
31
- config=TransformConfig(N=fg.ftransform_N,
32
- R=fg.ftransform_R,
33
- bin_start=fg.bin_start,
34
- bin_end=fg.bin_end,
35
- ttype=fg.ftransform_ttype))
23
+ audio_f = forward_transform(
24
+ audio=audio,
25
+ config=TransformConfig(
26
+ length=fg.ftransform_length,
27
+ overlap=fg.ftransform_overlap,
28
+ bin_start=fg.bin_start,
29
+ bin_end=fg.bin_end,
30
+ ttype=fg.ftransform_ttype,
31
+ ),
32
+ )
36
33
 
37
34
  transform_frames = audio_f.shape[0]
38
35
  feature_frames = transform_frames // (fg.decimation * fg.step)
@@ -49,10 +46,12 @@ def get_feature_from_audio(audio: AudioT,
49
46
  return feature
50
47
 
51
48
 
52
- def get_audio_from_feature(feature: Feature,
53
- feature_mode: str,
54
- num_classes: Optional[int] = 1,
55
- truth_mutex: Optional[bool] = False) -> AudioT:
49
+ def get_audio_from_feature(
50
+ feature: Feature,
51
+ feature_mode: str,
52
+ num_classes: int | None = 1,
53
+ truth_mutex: bool | None = False,
54
+ ) -> AudioT:
56
55
  """Apply inverse transform to feature data to generate audio data
57
56
 
58
57
  :param feature: Feature data [frames, stride=1, feature_parameters]
@@ -62,31 +61,34 @@ def get_audio_from_feature(feature: Feature,
62
61
  :return: Audio data [samples]
63
62
  """
64
63
  import numpy as np
65
-
66
64
  from pyaaware import FeatureGenerator
67
65
 
68
- from sonusai import SonusAIError
66
+ from sonusai.utils.compress import power_uncompress
67
+ from sonusai.utils.stacked_complex import unstack_complex
68
+
69
69
  from .datatypes import TransformConfig
70
70
  from .helpers import inverse_transform
71
- from sonusai.utils.stacked_complex import unstack_complex
72
- from sonusai.utils.compress import power_uncompress
73
71
 
74
72
  if feature.ndim != 3:
75
- raise SonusAIError('feature must have 3 dimensions: [frames, stride=1, feature_parameters]')
73
+ raise ValueError("feature must have 3 dimensions: [frames, stride=1, feature_parameters]")
76
74
 
77
75
  if feature.shape[1] != 1:
78
- raise SonusAIError('Strided feature data is not supported for audio extraction; stride must be 1.')
76
+ raise ValueError("Strided feature data is not supported for audio extraction; stride must be 1.")
79
77
 
80
- fg = FeatureGenerator(feature_mode=feature_mode,
81
- num_classes=num_classes,
82
- truth_mutex=truth_mutex)
78
+ fg = FeatureGenerator(feature_mode=feature_mode, num_classes=num_classes, truth_mutex=truth_mutex)
83
79
 
84
80
  feature_complex = unstack_complex(feature.squeeze())
85
- if feature_mode[0:1] == 'h':
81
+ if feature_mode[0:1] == "h":
86
82
  feature_complex = power_uncompress(feature_complex)
87
- return np.squeeze(inverse_transform(transform=feature_complex,
88
- config=TransformConfig(N=fg.itransform_N,
89
- R=fg.itransform_R,
90
- bin_start=fg.bin_start,
91
- bin_end=fg.bin_end,
92
- ttype=fg.itransform_ttype)))
83
+ return np.squeeze(
84
+ inverse_transform(
85
+ transform=feature_complex,
86
+ config=TransformConfig(
87
+ length=fg.itransform_length,
88
+ overlap=fg.itransform_overlap,
89
+ bin_start=fg.bin_start,
90
+ bin_end=fg.bin_end,
91
+ ttype=fg.itransform_ttype,
92
+ ),
93
+ )
94
+ )