sonusai 0.18.9__py3-none-any.whl → 0.19.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +20 -29
- sonusai/aawscd_probwrite.py +18 -18
- sonusai/audiofe.py +93 -80
- sonusai/calc_metric_spenh.py +395 -321
- sonusai/data/genmixdb.yml +5 -11
- sonusai/{gentcst.py → deprecated/gentcst.py} +146 -149
- sonusai/{plot.py → deprecated/plot.py} +177 -131
- sonusai/{tplot.py → deprecated/tplot.py} +124 -102
- sonusai/doc/__init__.py +1 -1
- sonusai/doc/doc.py +112 -177
- sonusai/doc.py +10 -10
- sonusai/genft.py +81 -91
- sonusai/genmetrics.py +51 -61
- sonusai/genmix.py +105 -115
- sonusai/genmixdb.py +201 -174
- sonusai/lsdb.py +56 -66
- sonusai/main.py +23 -20
- sonusai/metrics/__init__.py +2 -0
- sonusai/metrics/calc_audio_stats.py +29 -24
- sonusai/metrics/calc_class_weights.py +7 -7
- sonusai/metrics/calc_optimal_thresholds.py +5 -7
- sonusai/metrics/calc_pcm.py +3 -3
- sonusai/metrics/calc_pesq.py +10 -7
- sonusai/metrics/calc_phase_distance.py +3 -3
- sonusai/metrics/calc_sa_sdr.py +10 -8
- sonusai/metrics/calc_segsnr_f.py +16 -18
- sonusai/metrics/calc_speech.py +105 -47
- sonusai/metrics/calc_wer.py +35 -32
- sonusai/metrics/calc_wsdr.py +10 -7
- sonusai/metrics/class_summary.py +30 -27
- sonusai/metrics/confusion_matrix_summary.py +25 -22
- sonusai/metrics/one_hot.py +91 -57
- sonusai/metrics/snr_summary.py +53 -46
- sonusai/mixture/__init__.py +20 -14
- sonusai/mixture/audio.py +4 -6
- sonusai/mixture/augmentation.py +37 -43
- sonusai/mixture/class_count.py +5 -14
- sonusai/mixture/config.py +292 -225
- sonusai/mixture/constants.py +41 -30
- sonusai/mixture/data_io.py +155 -0
- sonusai/mixture/datatypes.py +111 -108
- sonusai/mixture/db_datatypes.py +54 -70
- sonusai/mixture/eq_rule_is_valid.py +6 -9
- sonusai/mixture/feature.py +40 -38
- sonusai/mixture/generation.py +522 -389
- sonusai/mixture/helpers.py +217 -272
- sonusai/mixture/log_duration_and_sizes.py +16 -13
- sonusai/mixture/mixdb.py +669 -477
- sonusai/mixture/soundfile_audio.py +12 -17
- sonusai/mixture/sox_audio.py +91 -112
- sonusai/mixture/sox_augmentation.py +8 -9
- sonusai/mixture/spectral_mask.py +4 -6
- sonusai/mixture/target_class_balancing.py +41 -36
- sonusai/mixture/targets.py +69 -67
- sonusai/mixture/tokenized_shell_vars.py +23 -23
- sonusai/mixture/torchaudio_audio.py +14 -15
- sonusai/mixture/torchaudio_augmentation.py +23 -27
- sonusai/mixture/truth.py +48 -26
- sonusai/mixture/truth_functions/__init__.py +26 -0
- sonusai/mixture/truth_functions/crm.py +56 -38
- sonusai/mixture/truth_functions/datatypes.py +37 -0
- sonusai/mixture/truth_functions/energy.py +85 -59
- sonusai/mixture/truth_functions/file.py +30 -30
- sonusai/mixture/truth_functions/phoneme.py +14 -7
- sonusai/mixture/truth_functions/sed.py +71 -45
- sonusai/mixture/truth_functions/target.py +69 -106
- sonusai/mkwav.py +58 -101
- sonusai/onnx_predict.py +46 -43
- sonusai/queries/__init__.py +3 -1
- sonusai/queries/queries.py +100 -59
- sonusai/speech/__init__.py +2 -0
- sonusai/speech/l2arctic.py +24 -23
- sonusai/speech/librispeech.py +16 -17
- sonusai/speech/mcgill.py +22 -21
- sonusai/speech/textgrid.py +32 -25
- sonusai/speech/timit.py +45 -42
- sonusai/speech/vctk.py +14 -13
- sonusai/speech/voxceleb.py +26 -20
- sonusai/summarize_metric_spenh.py +11 -10
- sonusai/utils/__init__.py +4 -3
- sonusai/utils/asl_p56.py +1 -1
- sonusai/utils/asr.py +37 -17
- sonusai/utils/asr_functions/__init__.py +2 -0
- sonusai/utils/asr_functions/aaware_whisper.py +18 -12
- sonusai/utils/audio_devices.py +12 -12
- sonusai/utils/braced_glob.py +6 -8
- sonusai/utils/calculate_input_shape.py +1 -4
- sonusai/utils/compress.py +2 -2
- sonusai/utils/convert_string_to_number.py +1 -3
- sonusai/utils/create_timestamp.py +1 -1
- sonusai/utils/create_ts_name.py +2 -2
- sonusai/utils/dataclass_from_dict.py +1 -1
- sonusai/utils/docstring.py +6 -6
- sonusai/utils/energy_f.py +9 -7
- sonusai/utils/engineering_number.py +56 -54
- sonusai/utils/get_label_names.py +8 -10
- sonusai/utils/human_readable_size.py +2 -2
- sonusai/utils/model_utils.py +3 -5
- sonusai/utils/numeric_conversion.py +2 -4
- sonusai/utils/onnx_utils.py +43 -32
- sonusai/utils/parallel.py +41 -30
- sonusai/utils/print_mixture_details.py +25 -22
- sonusai/utils/ranges.py +12 -12
- sonusai/utils/read_predict_data.py +11 -9
- sonusai/utils/reshape.py +19 -26
- sonusai/utils/seconds_to_hms.py +1 -1
- sonusai/utils/stacked_complex.py +8 -16
- sonusai/utils/stratified_shuffle_split.py +29 -27
- sonusai/utils/write_audio.py +2 -2
- sonusai/utils/yes_or_no.py +3 -3
- sonusai/vars.py +14 -14
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/METADATA +20 -21
- sonusai-0.19.6.dist-info/RECORD +125 -0
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/WHEEL +1 -1
- sonusai/mixture/truth_functions/data.py +0 -58
- sonusai/utils/read_mixture_data.py +0 -14
- sonusai-0.18.9.dist-info/RECORD +0 -125
- {sonusai-0.18.9.dist-info → sonusai-0.19.6.dist-info}/entry_points.txt +0 -0
sonusai/doc/doc.py
CHANGED
@@ -3,22 +3,27 @@ from sonusai.mixture import get_default_config
|
|
3
3
|
|
4
4
|
def doc_seed() -> str:
|
5
5
|
default = f"\nDefault value: {get_default_config()['seed']}"
|
6
|
+
# fmt: off
|
6
7
|
return """
|
7
8
|
'seed' is a mixture database configuration parameter that sets the random number
|
8
9
|
generator seed.
|
9
10
|
""" + default
|
11
|
+
# fmt: on
|
10
12
|
|
11
13
|
|
12
14
|
def doc_feature() -> str:
|
13
15
|
default = f"\nDefault value: {get_default_config()['feature']}"
|
16
|
+
# fmt: off
|
14
17
|
return """
|
15
18
|
'feature' is a mixture database configuration parameter that sets the feature
|
16
19
|
to use.
|
17
20
|
""" + default
|
21
|
+
# fmt: on
|
18
22
|
|
19
23
|
|
20
24
|
def doc_target_level_type() -> str:
|
21
25
|
default = f"\nDefault value: {get_default_config()['target_level_type']}"
|
26
|
+
# fmt: off
|
22
27
|
return """
|
23
28
|
'target_level_type' is a mixture database configuration parameter that sets the
|
24
29
|
algorithm to use to determine target energy level for SNR calculations.
|
@@ -27,10 +32,12 @@ Supported values are:
|
|
27
32
|
default mean of squares
|
28
33
|
speech ITU-T P.56 active speech level method B
|
29
34
|
""" + default
|
35
|
+
# fmt: on
|
30
36
|
|
31
37
|
|
32
38
|
def doc_targets() -> str:
|
33
39
|
default = f"\nDefault value: {get_default_config()['targets']}"
|
40
|
+
# fmt: off
|
34
41
|
return """
|
35
42
|
'targets' is a mixture database configuration parameter that sets the list of
|
36
43
|
targets to use.
|
@@ -48,46 +55,11 @@ Required field:
|
|
48
55
|
|
49
56
|
Optional fields:
|
50
57
|
|
51
|
-
'
|
52
|
-
Local overrides for truth. Contains
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
'config' Truth function config <dict>
|
57
|
-
'index' Truth index <int> or list(<int>)
|
58
|
-
|
59
|
-
'index' indicates which truth fields should be set.
|
60
|
-
0 indicates none, 1 is first element in truth output
|
61
|
-
vector, 2 2nd element, etc.
|
62
|
-
|
63
|
-
Examples:
|
64
|
-
index = 5 truth in class 5, truth(4, 1)
|
65
|
-
index = [1, 5] truth in classes 1 and 5, truth([0, 4], 1)
|
66
|
-
|
67
|
-
In mutually-exclusive mode, a frame is expected to only
|
68
|
-
belong to one class and thus all probabilities must sum to
|
69
|
-
1, and there should be a class for "other" or "none". This
|
70
|
-
is effectively truth for a classifier with multichannel
|
71
|
-
softmax output. SonusAI will automatically calculate class
|
72
|
-
num_classes as 1 - sum(truth(1:num_classes-1). For
|
73
|
-
example, a classifier for (dog, cat) must have
|
74
|
-
num_classes=3 to include "none" in truth(3).
|
75
|
-
|
76
|
-
For multi-label classification each class is an individual
|
77
|
-
probability for that class and any given frame can be
|
78
|
-
assigned to multiple classes/labels, i.e., the classes are
|
79
|
-
not mutually-exclusive. For example, a NN classifier with
|
80
|
-
multichannel sigmoid output. In this case, index could
|
81
|
-
also be a vector with multiple class indices. num_classes
|
82
|
-
should be set to the number of classes/categories.
|
83
|
-
|
84
|
-
'class_balancing_augmentation'
|
85
|
-
Target-specific class balancing augmentation override.
|
86
|
-
This target will not use the global class balancing
|
87
|
-
augmentation rule, but will use this rule instead for
|
88
|
-
class balancing operations. If this rule is specified and
|
89
|
-
empty, then this target will not be used for class
|
90
|
-
balancing.
|
58
|
+
'truth_configs'
|
59
|
+
Local overrides for truth configs. Contains the following:
|
60
|
+
'name' Name of truth config
|
61
|
+
'<param1>' Target-specific override for truth configuration parameter
|
62
|
+
'<param2>' Target-specific override for truth configuration parameter
|
91
63
|
|
92
64
|
'target_level_type'
|
93
65
|
Target-specific override for target_level_type.
|
@@ -96,23 +68,23 @@ Example:
|
|
96
68
|
|
97
69
|
targets:
|
98
70
|
- name: data/esc50/ESC-50-master/audio/1-*.wav
|
99
|
-
|
100
|
-
|
101
|
-
config:
|
71
|
+
truth_configs:
|
72
|
+
sed:
|
102
73
|
thresholds: [-38, -41, -48]
|
103
|
-
|
74
|
+
index: 2
|
75
|
+
class_balancing_augmentation: { }
|
104
76
|
- name: target.mp3
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
index: 5
|
110
|
-
class_balancing_augmentation: { }
|
77
|
+
truth_configs:
|
78
|
+
sed:
|
79
|
+
thresholds: [-37, -40, -46]
|
80
|
+
index: 5
|
111
81
|
""" + default
|
82
|
+
# fmt: on
|
112
83
|
|
113
84
|
|
114
85
|
def doc_num_classes() -> str:
|
115
86
|
default = f"\nDefault value: {get_default_config()['num_classes']}"
|
87
|
+
# fmt: off
|
116
88
|
return """
|
117
89
|
'num_classes' is a mixture database configuration parameter that sets the number of
|
118
90
|
classes in this dataset. The number of classes is the total number of parameters
|
@@ -123,69 +95,38 @@ Note that the model output 'parameters' dimension is NOT necessarily the same si
|
|
123
95
|
as the truth 'num_classes' dimension; there may be multiple truth functions combined
|
124
96
|
in the truth, e.g., for use in loss function calculations.
|
125
97
|
""" + default
|
98
|
+
# fmt: on
|
126
99
|
|
127
100
|
|
128
101
|
def doc_class_labels() -> str:
|
129
102
|
default = f"\nDefault value: {get_default_config()['class_labels']}"
|
103
|
+
# fmt: off
|
130
104
|
return """
|
131
105
|
'class_labels' is a mixture database configuration parameter that sets class labels
|
132
106
|
in this dataset.
|
133
107
|
""" + default
|
108
|
+
# fmt: on
|
134
109
|
|
135
110
|
|
136
|
-
def doc_class_weights_threshold() -> str:
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
def doc_truth_mode() -> str:
|
150
|
-
default = f"\nDefault value: {get_default_config()['truth_mode']}"
|
151
|
-
return """
|
152
|
-
'truth_mode' is a mixture database configuration parameter that sets the truth output
|
153
|
-
mode.
|
154
|
-
|
155
|
-
Supported values:
|
156
|
-
|
157
|
-
normal multi-label (no automatic calculation of other class)
|
158
|
-
Set 'num_classes' to the actual number of classification categories.
|
159
|
-
|
160
|
-
mutex mutually-exclusive
|
161
|
-
Set 'num_classes' to the actual number of classification categories plus 1
|
162
|
-
to include a "none" category which will be set to 1 - truth (where truth is
|
163
|
-
active for only one label) for all frames, to guarantee that the sum of all
|
164
|
-
truth outputs is equal to 1. This is required to support softmax()
|
165
|
-
neural-net outputs for multi-class classification (single label case).
|
166
|
-
""" + default
|
167
|
-
|
168
|
-
|
169
|
-
def doc_truth_reduction_function() -> str:
|
170
|
-
default = f"\nDefault value: {get_default_config()['truth_reduction_function']}"
|
171
|
-
return """
|
172
|
-
'truth_reduction_function' is a mixture database configuration parameter that set the
|
173
|
-
truth reduction function. It is used during feature generation to reduce sample-based
|
174
|
-
truth down to transform frame-based truth. The feature generator further reduces this
|
175
|
-
down to feature frame-based truth (based on stride and decimation).
|
176
|
-
|
177
|
-
Supported values:
|
178
|
-
|
179
|
-
max Returns the max value in a transform frame
|
180
|
-
mean Returns the mean of a transform frame
|
181
|
-
index0 Returns the first value in a transform frame
|
182
|
-
""" + default
|
111
|
+
# def doc_class_weights_threshold() -> str:
|
112
|
+
# default = f"\nDefault value: {get_default_config()['class_weights_threshold']}"
|
113
|
+
# # fmt: off
|
114
|
+
# return """
|
115
|
+
# 'class_weights_threshold' is a mixture database configuration parameter that sets
|
116
|
+
# the threshold for class weights calculation to quantize truth to binary for counting.
|
117
|
+
#
|
118
|
+
# Supports scalar or list:
|
119
|
+
#
|
120
|
+
# scalar use for all classes
|
121
|
+
# list must be of num_classes length
|
122
|
+
# """ + default
|
123
|
+
# # fmt: on
|
183
124
|
|
184
125
|
|
185
126
|
def get_truth_functions() -> str:
|
186
127
|
from sonusai.mixture import truth_functions
|
187
128
|
|
188
|
-
functions = [function for function in dir(truth_functions) if not function.startswith(
|
129
|
+
functions = [function for function in dir(truth_functions) if not function.startswith("__")]
|
189
130
|
text = "\nSupported truth functions:\n\n"
|
190
131
|
for function in functions:
|
191
132
|
docs = getattr(truth_functions, function).__doc__
|
@@ -196,65 +137,67 @@ def get_truth_functions() -> str:
|
|
196
137
|
return text
|
197
138
|
|
198
139
|
|
199
|
-
def
|
140
|
+
def doc_truth_configs() -> str:
|
200
141
|
import yaml
|
201
142
|
|
202
|
-
default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['
|
143
|
+
default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['truth_configs'])}"
|
144
|
+
# fmt: off
|
203
145
|
return """
|
204
|
-
'
|
205
|
-
generation
|
206
|
-
target-specific '
|
146
|
+
'truth_configs' is a mixture database configuration parameter that sets the truth
|
147
|
+
generation configurations for targets. There is a global 'truth_configs' and there may be
|
148
|
+
target-specific 'truth_configs'.
|
207
149
|
|
208
|
-
A truth
|
150
|
+
A truth config creates a type of truth and is associated with target file(s).
|
209
151
|
Target files may have multiple truth settings.
|
210
|
-
|
211
|
-
|
152
|
+
|
153
|
+
Truth is always generated per transform frame.
|
212
154
|
|
213
155
|
Note that there is a difference between transform frames and feature frames: a feature
|
214
|
-
frame
|
215
|
-
frames in a single feature).
|
156
|
+
frame may be decimated and may have a stride dimension greater than 1 (which aggregates
|
157
|
+
multiple transform frames in a single feature).
|
216
158
|
|
217
159
|
There are two notions of truth data: truth_t and truth_f. truth_t is what the truth
|
218
|
-
functions always generate and
|
219
|
-
domain, is created
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
should be set to the number of classes/categories.
|
160
|
+
functions always generate and is in the transform frame domain [transform_frames, truth_parameters].
|
161
|
+
truth_f, or truth in the feature domain, is created by passing truth_t into the feature
|
162
|
+
generator which produces feature frame domain truth data [feature_frames, stride or 1, truth_parameters].
|
163
|
+
|
164
|
+
The stride dimension may be reduced using the 'stride_reduction' parameter. Supported stride
|
165
|
+
reduction methods:
|
166
|
+
'none' preserve the stride dimension with no change
|
167
|
+
'max' reduce the stride dimension to 1 by taking the max
|
168
|
+
'mean' reduce the stride dimension to 1 by taking the mean
|
169
|
+
'first' reduce the stride dimension to 1 by taking the value in the first stride index
|
170
|
+
|
171
|
+
The 'truth_configs' parameter specifies the following:
|
172
|
+
|
173
|
+
'name' Name of truth configuration
|
174
|
+
'function' Name of truth function to use
|
175
|
+
'stride_reduction'
|
176
|
+
Name of stride reduction method to use
|
177
|
+
'<param1>' Function-specific configuration parameter
|
178
|
+
'<paramN>' Function-specific configuration parameter
|
179
|
+
'class_balancing_augmentation'
|
180
|
+
Class balancing augmentation.
|
181
|
+
This truth configuration will use this rule for class balancing operations.
|
182
|
+
If this rule is empty or unspecified, then this truth function will not
|
183
|
+
perform class balancing.
|
184
|
+
|
185
|
+
Class balancing ensures that each class in a sound classification dataset
|
186
|
+
is represented equally (i.e., each class has the same number of augmented
|
187
|
+
targets). This is achieved by creating new class balancing augmentation
|
188
|
+
rules and applying them to targets in underrepresented classes to create
|
189
|
+
more augmented targets for those classes.
|
190
|
+
|
191
|
+
This rule must contain at least one random entry in order to guarantee
|
192
|
+
unique additional data.
|
193
|
+
|
194
|
+
See 'augmentations' for details on augmentation rules.
|
254
195
|
""" + get_truth_functions() + default
|
196
|
+
# fmt: on
|
255
197
|
|
256
198
|
|
257
199
|
def doc_augmentations() -> str:
|
200
|
+
# fmt: off
|
258
201
|
return """
|
259
202
|
Augmentation Rules
|
260
203
|
|
@@ -369,50 +312,26 @@ This rule expands to 6 unique augmentations being applied to each target
|
|
369
312
|
tempo: 0.9, eq1: ["rand(100, 7500)", 0.8, 10]
|
370
313
|
tempo: 1.0, eq1: ["rand(100, 7500)", 0.8, 10]
|
371
314
|
tempo: 1.1, eq1: ["rand(100, 7500)", 0.8, 10]"""
|
315
|
+
# fmt: on
|
372
316
|
|
373
317
|
|
374
318
|
def doc_target_augmentations() -> str:
|
375
319
|
import yaml
|
376
320
|
|
377
321
|
default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['target_augmentations'])}"
|
322
|
+
# fmt: off
|
378
323
|
return """
|
379
324
|
'target_augmentations' is a mixture database configuration parameter that
|
380
325
|
specifies a list of augmentation rules to use for each target.
|
381
326
|
|
382
327
|
See 'augmentations' for details on augmentation rules.
|
383
328
|
""" + default
|
384
|
-
|
385
|
-
|
386
|
-
def doc_class_balancing_augmentation() -> str:
|
387
|
-
import yaml
|
388
|
-
|
389
|
-
default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['class_balancing_augmentation'])}"
|
390
|
-
return """
|
391
|
-
'class_balancing_augmentation' is a mixture database configuration parameter
|
392
|
-
that sets the default augmentation rule to use for generating class balancing
|
393
|
-
target data. This rule must contain at least one random entry in order to
|
394
|
-
guarantee unique additional data.
|
395
|
-
|
396
|
-
See 'augmentations' for details on augmentation rules.
|
397
|
-
""" + default
|
398
|
-
|
399
|
-
|
400
|
-
def doc_class_balancing() -> str:
|
401
|
-
default = f"\nDefault value: {get_default_config()['class_balancing']}"
|
402
|
-
return """
|
403
|
-
'class_balancing' is a mixture database configuration parameter that
|
404
|
-
enables/disables class balancing.
|
405
|
-
|
406
|
-
Class balancing ensures that each class in a sound classification dataset is
|
407
|
-
represented equally (i.e., each class has the same number of augmented targets).
|
408
|
-
This is achieved by creating new class balancing augmentation rules and applying
|
409
|
-
them to targets in underrepresented classes to create more augmented targets
|
410
|
-
for those classes.
|
411
|
-
""" + default
|
329
|
+
# fmt: on
|
412
330
|
|
413
331
|
|
414
332
|
def doc_noises() -> str:
|
415
333
|
default = f"\nDefault value: {get_default_config()['class_balancing']}"
|
334
|
+
# fmt: off
|
416
335
|
return """
|
417
336
|
'noises' is a mixture database configuration parameter that sets the list of
|
418
337
|
noises to use.
|
@@ -428,22 +347,27 @@ Required field:
|
|
428
347
|
.txt Each line in the given text file indicates an item which
|
429
348
|
may be anything in this list (audio, glob, .yml, or .txt)
|
430
349
|
""" + default
|
350
|
+
# fmt: on
|
431
351
|
|
432
352
|
|
433
353
|
def doc_noise_augmentations() -> str:
|
434
354
|
import yaml
|
435
355
|
|
436
356
|
default = f"\nDefault value:\n\n{yaml.dump(get_default_config()['noise_augmentations'])}"
|
357
|
+
|
358
|
+
# fmt: off
|
437
359
|
return """
|
438
360
|
'noise_augmentations' is a mixture database configuration parameter that
|
439
361
|
specifies a list of augmentation rules to use for each noise.
|
440
362
|
|
441
363
|
See 'augmentations' for details on augmentation rules.
|
442
364
|
""" + default
|
365
|
+
# fmt: on
|
443
366
|
|
444
367
|
|
445
368
|
def doc_snrs() -> str:
|
446
369
|
default = f"\nDefault value: {get_default_config()['snrs']}"
|
370
|
+
# fmt: off
|
447
371
|
return """
|
448
372
|
'snrs' is a mixture database configuration parameter that specifies a list
|
449
373
|
of required signal-to-noise ratios (in dB).
|
@@ -457,10 +381,12 @@ Special values:
|
|
457
381
|
-99 Noise only mixture (no target)
|
458
382
|
99 Target only mixture (no noise)
|
459
383
|
""" + default
|
384
|
+
# fmt: on
|
460
385
|
|
461
386
|
|
462
387
|
def doc_random_snrs() -> str:
|
463
388
|
default = f"\nDefault value: {get_default_config()['random_snrs']}"
|
389
|
+
# fmt: off
|
464
390
|
return """
|
465
391
|
'random_snrs' is a mixture database configuration parameter that specifies a
|
466
392
|
list of random signal-to-noise ratios. The value(s) must be specified as
|
@@ -473,10 +399,12 @@ to achieve the desired SNR. However, unlike ordered SNRs, the desired SNR is
|
|
473
399
|
randomized (per the given rule(s)) for each mixture, i.e., previous random
|
474
400
|
SNRs are not saved and reused.
|
475
401
|
""" + default
|
402
|
+
# fmt: on
|
476
403
|
|
477
404
|
|
478
405
|
def doc_noise_mix_mode() -> str:
|
479
406
|
default = f"\nDefault value: {get_default_config()['noise_mix_mode']}"
|
407
|
+
# fmt: off
|
480
408
|
return """
|
481
409
|
'noise_mix_mode' is a mixture database configuration parameter that sets
|
482
410
|
how to mix noises with targets.
|
@@ -492,20 +420,24 @@ Supported modes:
|
|
492
420
|
and loops back to the beginning if the end of a
|
493
421
|
noise/augmentation is reached.
|
494
422
|
""" + default
|
423
|
+
# fmt: on
|
495
424
|
|
496
425
|
|
497
426
|
def doc_impulse_responses() -> str:
|
498
427
|
default = f"\nDefault value: {get_default_config()['impulse_responses']}"
|
428
|
+
# fmt: off
|
499
429
|
return """
|
500
430
|
'impulse_responses' is a mixture database configuration parameter that specifies a
|
501
431
|
list of impulse response files to use.
|
502
432
|
|
503
433
|
See 'augmentations' for details.
|
504
434
|
""" + default
|
435
|
+
# fmt: on
|
505
436
|
|
506
437
|
|
507
438
|
def doc_spectral_masks() -> str:
|
508
439
|
default = f"\nDefault value: {get_default_config()['spectral_masks']}"
|
440
|
+
# fmt: off
|
509
441
|
return """
|
510
442
|
'spectral_masks' is a mixture database configuration parameter that specifies
|
511
443
|
a list of spectral mask rules.
|
@@ -521,16 +453,17 @@ Rules must specify all the following parameters:
|
|
521
453
|
't_num' Number of time masks to apply (set to 0 to apply none)
|
522
454
|
't_max_percent' Upper bound on the width of the time mask in percent
|
523
455
|
""" + default
|
456
|
+
# fmt: on
|
524
457
|
|
525
458
|
|
526
459
|
def doc_config() -> str:
|
527
460
|
from sonusai.mixture import VALID_CONFIGS
|
528
461
|
|
529
|
-
text =
|
530
|
-
text +=
|
531
|
-
text +=
|
462
|
+
text = "\n"
|
463
|
+
text += "The SonusAI database is defined using a config.yml file.\n\n"
|
464
|
+
text += "See the following for details:\n\n"
|
532
465
|
for c in VALID_CONFIGS:
|
533
|
-
text += f
|
466
|
+
text += f" {c}\n"
|
534
467
|
return text
|
535
468
|
|
536
469
|
|
@@ -539,6 +472,7 @@ def doc_asr_configs() -> str:
|
|
539
472
|
|
540
473
|
default = f"\nDefault value: {get_default_config()['asr_configs']}"
|
541
474
|
engines = get_available_engines()
|
475
|
+
# fmt: off
|
542
476
|
text = """
|
543
477
|
'asr_configs' is a mixture database configuration parameter that sets the list of
|
544
478
|
ASR engine(s) to use.
|
@@ -548,7 +482,7 @@ Required fields:
|
|
548
482
|
'name' Unique identifier for the ASR engine.
|
549
483
|
'engine' ASR engine to use. Available engines:
|
550
484
|
"""
|
551
|
-
text += f
|
485
|
+
text += f" {', '.join(engines)}\n"
|
552
486
|
text += """
|
553
487
|
Optional fields:
|
554
488
|
|
@@ -576,4 +510,5 @@ asr_configs:
|
|
576
510
|
|
577
511
|
Creates two ASR engines for use named faster_tiny_cuda and google.
|
578
512
|
"""
|
513
|
+
# fmt: on
|
579
514
|
return text + default
|
sonusai/doc.py
CHANGED
@@ -20,26 +20,26 @@ def main() -> None:
|
|
20
20
|
|
21
21
|
from sonusai import doc
|
22
22
|
|
23
|
-
topic = args[
|
23
|
+
topic = args["TOPIC"]
|
24
24
|
|
25
|
-
print(f
|
26
|
-
print(
|
25
|
+
print(f"SonusAI {sonusai.__version__} Documentation")
|
26
|
+
print("")
|
27
27
|
|
28
|
-
topics = sorted([item[4:] for item in dir(doc) if item.startswith(
|
28
|
+
topics = sorted([item[4:] for item in dir(doc) if item.startswith("doc_")])
|
29
29
|
|
30
30
|
if topic not in topics:
|
31
31
|
if topic is not None:
|
32
|
-
print(f
|
33
|
-
print(
|
32
|
+
print(f"Unknown topic: {topic}")
|
33
|
+
print("")
|
34
34
|
|
35
|
-
print(
|
35
|
+
print("Available topics:")
|
36
36
|
for item in topics:
|
37
|
-
print(f
|
37
|
+
print(f" {item}")
|
38
38
|
return
|
39
39
|
|
40
|
-
text = getattr(doc,
|
40
|
+
text = getattr(doc, "doc_" + topic)()
|
41
41
|
print(text[1:])
|
42
42
|
|
43
43
|
|
44
|
-
if __name__ ==
|
44
|
+
if __name__ == "__main__":
|
45
45
|
main()
|