sonusai 1.0.16__cp311-abi3-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +170 -0
- sonusai/aawscd_probwrite.py +148 -0
- sonusai/audiofe.py +481 -0
- sonusai/calc_metric_spenh.py +1136 -0
- sonusai/config/__init__.py +0 -0
- sonusai/config/asr.py +21 -0
- sonusai/config/config.py +65 -0
- sonusai/config/config.yml +49 -0
- sonusai/config/constants.py +53 -0
- sonusai/config/ir.py +124 -0
- sonusai/config/ir_delay.py +62 -0
- sonusai/config/source.py +275 -0
- sonusai/config/spectral_masks.py +15 -0
- sonusai/config/truth.py +64 -0
- sonusai/constants.py +14 -0
- sonusai/data/__init__.py +0 -0
- sonusai/data/silero_vad_v5.1.jit +0 -0
- sonusai/data/silero_vad_v5.1.onnx +0 -0
- sonusai/data/speech_ma01_01.wav +0 -0
- sonusai/data/whitenoise.wav +0 -0
- sonusai/datatypes.py +383 -0
- sonusai/deprecated/gentcst.py +632 -0
- sonusai/deprecated/plot.py +519 -0
- sonusai/deprecated/tplot.py +365 -0
- sonusai/doc.py +52 -0
- sonusai/doc_strings/__init__.py +1 -0
- sonusai/doc_strings/doc_strings.py +531 -0
- sonusai/genft.py +196 -0
- sonusai/genmetrics.py +183 -0
- sonusai/genmix.py +199 -0
- sonusai/genmixdb.py +235 -0
- sonusai/ir_metric.py +551 -0
- sonusai/lsdb.py +141 -0
- sonusai/main.py +134 -0
- sonusai/metrics/__init__.py +43 -0
- sonusai/metrics/calc_audio_stats.py +42 -0
- sonusai/metrics/calc_class_weights.py +90 -0
- sonusai/metrics/calc_optimal_thresholds.py +73 -0
- sonusai/metrics/calc_pcm.py +45 -0
- sonusai/metrics/calc_pesq.py +36 -0
- sonusai/metrics/calc_phase_distance.py +43 -0
- sonusai/metrics/calc_sa_sdr.py +64 -0
- sonusai/metrics/calc_sample_weights.py +25 -0
- sonusai/metrics/calc_segsnr_f.py +82 -0
- sonusai/metrics/calc_speech.py +382 -0
- sonusai/metrics/calc_wer.py +71 -0
- sonusai/metrics/calc_wsdr.py +57 -0
- sonusai/metrics/calculate_metrics.py +395 -0
- sonusai/metrics/class_summary.py +74 -0
- sonusai/metrics/confusion_matrix_summary.py +75 -0
- sonusai/metrics/one_hot.py +283 -0
- sonusai/metrics/snr_summary.py +128 -0
- sonusai/metrics_summary.py +314 -0
- sonusai/mixture/__init__.py +15 -0
- sonusai/mixture/audio.py +187 -0
- sonusai/mixture/class_balancing.py +103 -0
- sonusai/mixture/constants.py +3 -0
- sonusai/mixture/data_io.py +173 -0
- sonusai/mixture/db.py +169 -0
- sonusai/mixture/db_datatypes.py +92 -0
- sonusai/mixture/effects.py +344 -0
- sonusai/mixture/feature.py +78 -0
- sonusai/mixture/generation.py +1116 -0
- sonusai/mixture/helpers.py +351 -0
- sonusai/mixture/ir_effects.py +77 -0
- sonusai/mixture/log_duration_and_sizes.py +23 -0
- sonusai/mixture/mixdb.py +1857 -0
- sonusai/mixture/pad_audio.py +35 -0
- sonusai/mixture/resample.py +7 -0
- sonusai/mixture/sox_effects.py +195 -0
- sonusai/mixture/sox_help.py +650 -0
- sonusai/mixture/spectral_mask.py +51 -0
- sonusai/mixture/truth.py +61 -0
- sonusai/mixture/truth_functions/__init__.py +45 -0
- sonusai/mixture/truth_functions/crm.py +105 -0
- sonusai/mixture/truth_functions/energy.py +222 -0
- sonusai/mixture/truth_functions/file.py +48 -0
- sonusai/mixture/truth_functions/metadata.py +24 -0
- sonusai/mixture/truth_functions/metrics.py +28 -0
- sonusai/mixture/truth_functions/phoneme.py +18 -0
- sonusai/mixture/truth_functions/sed.py +98 -0
- sonusai/mixture/truth_functions/target.py +142 -0
- sonusai/mkwav.py +135 -0
- sonusai/onnx_predict.py +363 -0
- sonusai/parse/__init__.py +0 -0
- sonusai/parse/expand.py +156 -0
- sonusai/parse/parse_source_directive.py +129 -0
- sonusai/parse/rand.py +214 -0
- sonusai/py.typed +0 -0
- sonusai/queries/__init__.py +0 -0
- sonusai/queries/queries.py +239 -0
- sonusai/rs.abi3.so +0 -0
- sonusai/rs.pyi +1 -0
- sonusai/rust/__init__.py +0 -0
- sonusai/speech/__init__.py +0 -0
- sonusai/speech/l2arctic.py +121 -0
- sonusai/speech/librispeech.py +102 -0
- sonusai/speech/mcgill.py +71 -0
- sonusai/speech/textgrid.py +89 -0
- sonusai/speech/timit.py +138 -0
- sonusai/speech/types.py +12 -0
- sonusai/speech/vctk.py +53 -0
- sonusai/speech/voxceleb.py +108 -0
- sonusai/utils/__init__.py +3 -0
- sonusai/utils/asl_p56.py +130 -0
- sonusai/utils/asr.py +91 -0
- sonusai/utils/asr_functions/__init__.py +3 -0
- sonusai/utils/asr_functions/aaware_whisper.py +69 -0
- sonusai/utils/audio_devices.py +50 -0
- sonusai/utils/braced_glob.py +50 -0
- sonusai/utils/calculate_input_shape.py +26 -0
- sonusai/utils/choice.py +51 -0
- sonusai/utils/compress.py +25 -0
- sonusai/utils/convert_string_to_number.py +6 -0
- sonusai/utils/create_timestamp.py +5 -0
- sonusai/utils/create_ts_name.py +14 -0
- sonusai/utils/dataclass_from_dict.py +27 -0
- sonusai/utils/db.py +16 -0
- sonusai/utils/docstring.py +53 -0
- sonusai/utils/energy_f.py +44 -0
- sonusai/utils/engineering_number.py +166 -0
- sonusai/utils/evaluate_random_rule.py +15 -0
- sonusai/utils/get_frames_per_batch.py +2 -0
- sonusai/utils/get_label_names.py +20 -0
- sonusai/utils/grouper.py +6 -0
- sonusai/utils/human_readable_size.py +7 -0
- sonusai/utils/keyboard_interrupt.py +12 -0
- sonusai/utils/load_object.py +21 -0
- sonusai/utils/max_text_width.py +9 -0
- sonusai/utils/model_utils.py +28 -0
- sonusai/utils/numeric_conversion.py +11 -0
- sonusai/utils/onnx_utils.py +155 -0
- sonusai/utils/parallel.py +162 -0
- sonusai/utils/path_info.py +7 -0
- sonusai/utils/print_mixture_details.py +60 -0
- sonusai/utils/rand.py +13 -0
- sonusai/utils/ranges.py +43 -0
- sonusai/utils/read_predict_data.py +32 -0
- sonusai/utils/reshape.py +154 -0
- sonusai/utils/seconds_to_hms.py +7 -0
- sonusai/utils/stacked_complex.py +82 -0
- sonusai/utils/stratified_shuffle_split.py +170 -0
- sonusai/utils/tokenized_shell_vars.py +143 -0
- sonusai/utils/write_audio.py +26 -0
- sonusai/utils/yes_or_no.py +8 -0
- sonusai/vars.py +47 -0
- sonusai-1.0.16.dist-info/METADATA +56 -0
- sonusai-1.0.16.dist-info/RECORD +150 -0
- sonusai-1.0.16.dist-info/WHEEL +4 -0
- sonusai-1.0.16.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,531 @@
|
|
1
|
+
from ..config.config import default_config
|
2
|
+
|
3
|
+
|
4
|
+
def doc_seed() -> str:
|
5
|
+
default = f"\nDefault value: {default_config()['seed']}"
|
6
|
+
# fmt: off
|
7
|
+
return """
|
8
|
+
'seed' is a mixture database configuration parameter that sets the random number
|
9
|
+
generator seed.
|
10
|
+
""" + default
|
11
|
+
# fmt: on
|
12
|
+
|
13
|
+
|
14
|
+
def doc_feature() -> str:
|
15
|
+
default = f"\nDefault value: {default_config()['feature']}"
|
16
|
+
# fmt: off
|
17
|
+
return """
|
18
|
+
'feature' is a mixture database configuration parameter that sets the feature
|
19
|
+
to use.
|
20
|
+
""" + default
|
21
|
+
# fmt: on
|
22
|
+
|
23
|
+
|
24
|
+
def doc_level_type() -> str:
|
25
|
+
default = f"\nDefault value: {default_config()['level_type']}"
|
26
|
+
# fmt: off
|
27
|
+
return """
|
28
|
+
'level_type' is a mixture database configuration parameter that sets the
|
29
|
+
algorithm to use to determine energy level for SNR calculations.
|
30
|
+
Supported values are:
|
31
|
+
|
32
|
+
default mean of squares
|
33
|
+
speech ITU-T P.56 active speech level method B
|
34
|
+
""" + default
|
35
|
+
# fmt: on
|
36
|
+
|
37
|
+
|
38
|
+
def doc_sources() -> str:
|
39
|
+
default = f"\nDefault value: {default_config()['sources']}"
|
40
|
+
# fmt: off
|
41
|
+
return """
|
42
|
+
'sources' is a mixture database configuration parameter that sets the list of
|
43
|
+
sources to use.
|
44
|
+
|
45
|
+
Two sources are required: 'primary' and 'noise'. Additional sources may be
|
46
|
+
specified with arbitrary names.
|
47
|
+
|
48
|
+
Each source has the following fields:
|
49
|
+
|
50
|
+
'files' Required list of files to use. Sub-fields:
|
51
|
+
'name' File name. May be one of the following:
|
52
|
+
audio Supported formats are .wav, .mp3, .m4a, .aif, .flac, and .ogg
|
53
|
+
glob Matches file glob patterns
|
54
|
+
.yml The given YAML file is parsed into the list
|
55
|
+
.txt Each line in the given text file indicates an item which
|
56
|
+
may be anything in this list (audio, glob, .yml, or .txt)
|
57
|
+
'class_indices' Optional list of class indices
|
58
|
+
|
59
|
+
'truth_configs' Required list of truth config(s) to use for this source. Sub-fields:
|
60
|
+
'<name>' Name of truth config. Sub-fields:
|
61
|
+
'function' Truth function
|
62
|
+
'stride_reduction' Stride reduction method to use. May be one of: none, max
|
63
|
+
|
64
|
+
'level_type'
|
65
|
+
Source-specific override for level_type.
|
66
|
+
|
67
|
+
Example:
|
68
|
+
|
69
|
+
targets:
|
70
|
+
- name: data/esc50/ESC-50-master/audio/1-*.wav
|
71
|
+
truth_configs:
|
72
|
+
sed:
|
73
|
+
thresholds: [-38, -41, -48]
|
74
|
+
index: 2
|
75
|
+
class_balancing_effect: { }
|
76
|
+
- name: target.mp3
|
77
|
+
truth_configs:
|
78
|
+
sed:
|
79
|
+
thresholds: [-37, -40, -46]
|
80
|
+
index: 5
|
81
|
+
""" + default
|
82
|
+
# fmt: on
|
83
|
+
|
84
|
+
|
85
|
+
def doc_num_classes() -> str:
|
86
|
+
default = f"\nDefault value: {default_config()['num_classes']}"
|
87
|
+
# fmt: off
|
88
|
+
return """
|
89
|
+
'num_classes' is a mixture database configuration parameter that sets the number of
|
90
|
+
classes in this dataset. The number of classes is the total number of parameters
|
91
|
+
(or classes or labels) in the truth. This controls the size of the truth input to
|
92
|
+
the model.
|
93
|
+
|
94
|
+
Note that the model output 'parameters' dimension is NOT necessarily the same size
|
95
|
+
as the truth 'num_classes' dimension; there may be multiple truth functions combined
|
96
|
+
in the truth, e.g., for use in loss function calculations.
|
97
|
+
""" + default
|
98
|
+
# fmt: on
|
99
|
+
|
100
|
+
|
101
|
+
def doc_class_labels() -> str:
|
102
|
+
default = f"\nDefault value: {default_config()['class_labels']}"
|
103
|
+
# fmt: off
|
104
|
+
return """
|
105
|
+
'class_labels' is a mixture database configuration parameter that sets class labels
|
106
|
+
in this dataset.
|
107
|
+
""" + default
|
108
|
+
# fmt: on
|
109
|
+
|
110
|
+
|
111
|
+
def doc_class_weights_threshold() -> str:
|
112
|
+
default = f"\nDefault value: {default_config()['class_weights_threshold']}"
|
113
|
+
# fmt: off
|
114
|
+
return """
|
115
|
+
'class_weights_threshold' is a mixture database configuration parameter that sets
|
116
|
+
the threshold for class weights calculation to quantize truth to binary for counting.
|
117
|
+
|
118
|
+
Supports scalar or list:
|
119
|
+
|
120
|
+
scalar use for all classes
|
121
|
+
list must be of num_classes length
|
122
|
+
""" + default
|
123
|
+
# fmt: on
|
124
|
+
|
125
|
+
|
126
|
+
def get_truth_functions() -> str:
|
127
|
+
from ..mixture import truth_functions
|
128
|
+
|
129
|
+
functions = [function for function in dir(truth_functions) if not function.startswith("__")]
|
130
|
+
text = "\nSupported truth functions:\n\n"
|
131
|
+
for function in functions:
|
132
|
+
docs = getattr(truth_functions, function).__doc__
|
133
|
+
if docs is not None:
|
134
|
+
text += f" {function}\n"
|
135
|
+
for doc in docs.splitlines():
|
136
|
+
text += f" {doc}\n"
|
137
|
+
return text
|
138
|
+
|
139
|
+
|
140
|
+
def doc_truth_configs() -> str:
|
141
|
+
import yaml
|
142
|
+
|
143
|
+
default = f"\nDefault value:\n\n{yaml.dump(default_config()['truth_configs'])}"
|
144
|
+
# fmt: off
|
145
|
+
return """
|
146
|
+
'truth_configs' is a mixture database configuration parameter that sets the truth
|
147
|
+
generation configurations for targets. There is a global 'truth_configs' and there may be
|
148
|
+
target-specific 'truth_configs'.
|
149
|
+
|
150
|
+
A truth config creates a type of truth and is associated with target file(s).
|
151
|
+
Target files may have multiple truth settings.
|
152
|
+
|
153
|
+
Truth is always generated per transform frame.
|
154
|
+
|
155
|
+
Note that there is a difference between transform frames and feature frames: a feature
|
156
|
+
frame may be decimated and may have a stride dimension greater than 1 (which aggregates
|
157
|
+
multiple transform frames in a single feature).
|
158
|
+
|
159
|
+
There are two notions of truth data: truth_t and truth_f. truth_t is what the truth
|
160
|
+
functions always generate and is in the transform frame domain [transform_frames, truth_parameters].
|
161
|
+
truth_f, or truth in the feature domain, is created by passing truth_t into the feature
|
162
|
+
generator which produces feature frame domain truth data [feature_frames, stride or 1, truth_parameters].
|
163
|
+
|
164
|
+
The stride dimension may be reduced using the 'stride_reduction' parameter. Supported stride
|
165
|
+
reduction methods:
|
166
|
+
'none' preserve the stride dimension with no change
|
167
|
+
'max' reduce the stride dimension to 1 by taking the max
|
168
|
+
'mean' reduce the stride dimension to 1 by taking the mean
|
169
|
+
'first' reduce the stride dimension to 1 by taking the value in the first stride index
|
170
|
+
|
171
|
+
The 'truth_configs' parameter specifies the following:
|
172
|
+
|
173
|
+
'name' Name of truth configuration
|
174
|
+
'function' Name of truth function to use
|
175
|
+
'stride_reduction'
|
176
|
+
Name of stride reduction method to use
|
177
|
+
'<param1>' Function-specific configuration parameter
|
178
|
+
'<paramN>' Function-specific configuration parameter
|
179
|
+
'class_balancing_effect'
|
180
|
+
Class balancing effect.
|
181
|
+
This truth configuration will use this rule for class balancing operations.
|
182
|
+
If this rule is empty or unspecified, then this truth function will not
|
183
|
+
perform class balancing.
|
184
|
+
|
185
|
+
Class balancing ensures that each class in a sound classification dataset
|
186
|
+
is represented equally (i.e., each class has the same number of augmented
|
187
|
+
targets). This is achieved by creating new class balancing effect
|
188
|
+
rules and applying them to targets in underrepresented classes to create
|
189
|
+
more effected targets for those classes.
|
190
|
+
|
191
|
+
This rule must contain at least one random entry in order to guarantee
|
192
|
+
unique additional data.
|
193
|
+
|
194
|
+
See 'effects' for details on effect rules.
|
195
|
+
""" + get_truth_functions() + default
|
196
|
+
# fmt: on
|
197
|
+
|
198
|
+
|
199
|
+
def doc_effects() -> str:
|
200
|
+
# fmt: off
|
201
|
+
return """
|
202
|
+
Augmentation Rules
|
203
|
+
|
204
|
+
These rules may be specified for target and/or noise. Each rule will be
|
205
|
+
applied for each target/noise. The values may be specified as scalars, lists,
|
206
|
+
or random using the syntax: 'rand(<min>, <max>)'.
|
207
|
+
|
208
|
+
If a value is specified as a list, then the rule is repeated for each value in
|
209
|
+
the list.
|
210
|
+
|
211
|
+
If a value is specified using rand, then a randomized rule is generated
|
212
|
+
dynamically per use.
|
213
|
+
|
214
|
+
Rules may specify any or all of the following effects:
|
215
|
+
|
216
|
+
'normalize' Normalize audio file to the specified level (in dBFS).
|
217
|
+
'gain' Apply an amplification or an attenuation to the audio signal.
|
218
|
+
The signal level is adjusted by the given number of dB; positive
|
219
|
+
amplifies, negative attenuates, 0 does nothing.
|
220
|
+
'pitch' Change the audio pitch (but not its tempo). Pitch amount is
|
221
|
+
specified as positive or negative 'cents' (i.e., 100ths of a
|
222
|
+
semitone).
|
223
|
+
'tempo' Change the audio tempo (but not its pitch). Tempo amount is
|
224
|
+
specified as the ratio of the new tempo to the old tempo. For
|
225
|
+
example, '1.1' speeds up the tempo by 10% and '0.9' slows it
|
226
|
+
down by 10%.
|
227
|
+
'eq1' Apply a two-pole peaking equalization filter. EQ parameters are
|
228
|
+
specified as a [frequency, width, gain] triple where:
|
229
|
+
'frequency' gives the central frequency in Hz (20 - SR/2),
|
230
|
+
'width' gives the width as a Q-factor (0.3 - 2.0), and
|
231
|
+
'gain' gives the gain in dB (-20 - 20).
|
232
|
+
'eq2' Apply an additional band of EQ. Same as 'eq1'
|
233
|
+
'eq3' Apply an additional band of EQ. Same as 'eq1'
|
234
|
+
'lpf' Apply a low-pass Butterworth filter. The 3 dB point frequency is
|
235
|
+
specified in Hz (20 - SR/2).
|
236
|
+
'ir' An index into a list of impulse responses (specified in the
|
237
|
+
'impulse_responses' parameter).
|
238
|
+
For targets, the impulse response is applied AFTER truth generation
|
239
|
+
and the resulting audio is still aligned with the truth. Random
|
240
|
+
syntax for 'ir' is one of the following:
|
241
|
+
'choose()' chooses a random IR from the entire list
|
242
|
+
'choose(<min>, <max>)' chooses a random IR in the range <min> to <max>
|
243
|
+
'choose(<tag>) chooses a random IR that matches <tag>
|
244
|
+
|
245
|
+
Only the specified effects for a given rule are applied; all others are
|
246
|
+
skipped in the given rule. For example, if a rule only specifies 'tempo',
|
247
|
+
then only a tempo effect is applied and all other possible effects
|
248
|
+
are ignored (e.g., 'gain', 'pitch', etc.).
|
249
|
+
|
250
|
+
Example:
|
251
|
+
|
252
|
+
target_effects:
|
253
|
+
- normalize: -3.5
|
254
|
+
- normalize: -3.5
|
255
|
+
pitch: [-300, 300]
|
256
|
+
tempo: [0.8, 1.2]
|
257
|
+
eq1: [[1000, 0.8, 3], [600, 1.0, -4], [800, 0.6, 0]]
|
258
|
+
- normalize: -3.5
|
259
|
+
pitch: "rand(-300, 300)"
|
260
|
+
eq1: ["rand(100, 6000)", "rand(0.6, 1.0)", "rand(-6, 6)"]
|
261
|
+
lpf: "rand(1000, 8000)"
|
262
|
+
- tempo: "rand(0.9, 1.1)"
|
263
|
+
eq1: [["rand(100, 7500)", 0.8, -10], ["rand(100, 7500)", 0.8, 10]]
|
264
|
+
|
265
|
+
There are four rules given in this example.
|
266
|
+
|
267
|
+
The first rule is simple:
|
268
|
+
- normalize: -3.5
|
269
|
+
|
270
|
+
This results in just one effect being applied to each target:
|
271
|
+
|
272
|
+
normalize: -3.5
|
273
|
+
|
274
|
+
The second rule illustrates the use of lists to specify values:
|
275
|
+
- normalize: -3.5
|
276
|
+
pitch: [-300, 300]
|
277
|
+
tempo: [0.8, 1.2]
|
278
|
+
eq1: [[1000, 0.8, 3], [600, 1.0, -4], [800, 0.6, 0]]
|
279
|
+
|
280
|
+
There are two values given for pitch, two for tempo, and three for EQ. This
|
281
|
+
rule expands to 2 * 2 * 3 = 12 unique effects being applied to each
|
282
|
+
target:
|
283
|
+
|
284
|
+
normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [1000, 0.8, 3]
|
285
|
+
normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [ 600, 1.0, -4]
|
286
|
+
normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [ 800, 0.6, 0]
|
287
|
+
normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [1000, 0.8, 3]
|
288
|
+
normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [ 600, 1.0, -4]
|
289
|
+
normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [ 800, 0.6, 0]
|
290
|
+
normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [1000, 0.8, 3]
|
291
|
+
normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [ 600, 1.0, -4]
|
292
|
+
normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [ 800, 0.6, 0]
|
293
|
+
normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [1000, 0.8, 3]
|
294
|
+
normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [ 600, 1.0, -4]
|
295
|
+
normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [ 800, 0.6, 0]
|
296
|
+
|
297
|
+
The third rule shows the use of rand:
|
298
|
+
- normalize: -3.5
|
299
|
+
pitch: "rand(-300, 300)"
|
300
|
+
eq1: ["rand(100, 6000)", "rand(0.6, 1.0)", "rand(-6, 6)"]
|
301
|
+
lpf: "rand(1000, 8000)"
|
302
|
+
|
303
|
+
This rule is used to create randomized effects per use.
|
304
|
+
|
305
|
+
The fourth rule demonstrates the use of scalars, lists, and rand:
|
306
|
+
- tempo: [0.9, 1, 1.1]
|
307
|
+
eq1: [["rand(100, 7500)", 0.8, -10], ["rand(100, 7500)", 0.8, 10]]
|
308
|
+
|
309
|
+
This rule expands to 6 unique effects being applied to each target
|
310
|
+
(list of 3 * list of 2). Here is the expansion:
|
311
|
+
|
312
|
+
tempo: 0.9, eq1: ["rand(100, 7500)", 0.8, -10]
|
313
|
+
tempo: 1.0, eq1: ["rand(100, 7500)", 0.8, -10]
|
314
|
+
tempo: 1.1, eq1: ["rand(100, 7500)", 0.8, -10]
|
315
|
+
tempo: 0.9, eq1: ["rand(100, 7500)", 0.8, 10]
|
316
|
+
tempo: 1.0, eq1: ["rand(100, 7500)", 0.8, 10]
|
317
|
+
tempo: 1.1, eq1: ["rand(100, 7500)", 0.8, 10]"""
|
318
|
+
# fmt: on
|
319
|
+
|
320
|
+
|
321
|
+
def doc_target_effects() -> str:
|
322
|
+
import yaml
|
323
|
+
|
324
|
+
default = f"\nDefault value:\n\n{yaml.dump(default_config()['target_effects'])}"
|
325
|
+
# fmt: off
|
326
|
+
return """
|
327
|
+
'target_effects' is a mixture database configuration parameter that
|
328
|
+
specifies a list of effect rules to use for each target.
|
329
|
+
|
330
|
+
See 'effects' for details on effect rules.
|
331
|
+
""" + default
|
332
|
+
# fmt: on
|
333
|
+
|
334
|
+
|
335
|
+
def doc_target_distortions() -> str:
|
336
|
+
import yaml
|
337
|
+
|
338
|
+
default = f"\nDefault value:\n\n{yaml.dump(default_config()['target_distortions'])}"
|
339
|
+
# fmt: off
|
340
|
+
return """
|
341
|
+
'target_distortions' is a mixture database configuration parameter that
|
342
|
+
specifies a list of distortion rules to use for each target.
|
343
|
+
|
344
|
+
See 'effects' for details on distortion rules.
|
345
|
+
""" + default
|
346
|
+
# fmt: on
|
347
|
+
|
348
|
+
|
349
|
+
def doc_noises() -> str:
|
350
|
+
default = f"\nDefault value: {default_config()['class_balancing']}"
|
351
|
+
# fmt: off
|
352
|
+
return """
|
353
|
+
'noises' is a mixture database configuration parameter that sets the list of
|
354
|
+
noises to use.
|
355
|
+
|
356
|
+
Required field:
|
357
|
+
|
358
|
+
'name'
|
359
|
+
File name. May be one of the following:
|
360
|
+
|
361
|
+
audio Supported formats are .wav, .mp3, .aif, .flac, and .ogg
|
362
|
+
glob Matches file glob patterns
|
363
|
+
.yml The given YAML file is parsed into the list
|
364
|
+
.txt Each line in the given text file indicates an item which
|
365
|
+
may be anything in this list (audio, glob, .yml, or .txt)
|
366
|
+
""" + default
|
367
|
+
# fmt: on
|
368
|
+
|
369
|
+
|
370
|
+
def doc_noise_effects() -> str:
|
371
|
+
import yaml
|
372
|
+
|
373
|
+
default = f"\nDefault value:\n\n{yaml.dump(default_config()['noise_effects'])}"
|
374
|
+
|
375
|
+
# fmt: off
|
376
|
+
return """
|
377
|
+
'noise_effects' is a mixture database configuration parameter that
|
378
|
+
specifies a list of effect rules to use for each noise.
|
379
|
+
|
380
|
+
See 'effects' for details on effect rules.
|
381
|
+
""" + default
|
382
|
+
# fmt: on
|
383
|
+
|
384
|
+
|
385
|
+
def doc_snrs() -> str:
|
386
|
+
default = f"\nDefault value: {default_config()['snrs']}"
|
387
|
+
# fmt: off
|
388
|
+
return """
|
389
|
+
'snrs' is a mixture database configuration parameter that specifies a list
|
390
|
+
of required signal-to-noise ratios (in dB).
|
391
|
+
|
392
|
+
All other effects are applied to both target and noise and then the
|
393
|
+
energy levels are measured and the appropriate noise gain calculated to
|
394
|
+
achieve the desired SNR.
|
395
|
+
|
396
|
+
Special values:
|
397
|
+
|
398
|
+
-99 Noise only mixture (no target)
|
399
|
+
99 Target only mixture (no noise)
|
400
|
+
""" + default
|
401
|
+
# fmt: on
|
402
|
+
|
403
|
+
|
404
|
+
def doc_random_snrs() -> str:
|
405
|
+
default = f"\nDefault value: {default_config()['random_snrs']}"
|
406
|
+
# fmt: off
|
407
|
+
return """
|
408
|
+
'random_snrs' is a mixture database configuration parameter that specifies a
|
409
|
+
list of random signal-to-noise ratios. The value(s) must be specified as
|
410
|
+
random using the syntax: 'rand(<min>, <max>)'.
|
411
|
+
|
412
|
+
Random SNRs behave slightly differently from regular or ordered SNRs. As with
|
413
|
+
ordered SNRs, all other effects are applied to both target and noise and
|
414
|
+
then the energy levels are measured and the appropriate noise gain calculated
|
415
|
+
to achieve the desired SNR. However, unlike ordered SNRs, the desired SNR is
|
416
|
+
randomized (per the given rule(s)) for each mixture, i.e., previous random
|
417
|
+
SNRs are not saved and reused.
|
418
|
+
""" + default
|
419
|
+
# fmt: on
|
420
|
+
|
421
|
+
|
422
|
+
def doc_noise_mix_mode() -> str:
|
423
|
+
default = f"\nDefault value: {default_config()['noise_mix_mode']}"
|
424
|
+
# fmt: off
|
425
|
+
return """
|
426
|
+
'noise_mix_mode' is a mixture database configuration parameter that sets
|
427
|
+
how to mix noises with targets.
|
428
|
+
|
429
|
+
Supported modes:
|
430
|
+
|
431
|
+
exhaustive Use every noise/effect with every primary/effect.
|
432
|
+
non-exhaustive Cycle through every primary/effect without necessarily
|
433
|
+
using all noise/effect combinations (reduced data set).
|
434
|
+
non-combinatorial Combine a primary/effect with a single cut of a
|
435
|
+
noise/effect non-exhaustively (each primary/effect
|
436
|
+
does not use each noise/effect). Cut has a random start
|
437
|
+
and loops back to the beginning if the end of a
|
438
|
+
noise/effect is reached.
|
439
|
+
""" + default
|
440
|
+
# fmt: on
|
441
|
+
|
442
|
+
|
443
|
+
def doc_impulse_responses() -> str:
|
444
|
+
default = f"\nDefault value: {default_config()['impulse_responses']}"
|
445
|
+
# fmt: off
|
446
|
+
return """
|
447
|
+
'impulse_responses' is a mixture database configuration parameter that specifies a
|
448
|
+
list of impulse response files to use.
|
449
|
+
|
450
|
+
See 'effects' for details.
|
451
|
+
""" + default
|
452
|
+
# fmt: on
|
453
|
+
|
454
|
+
|
455
|
+
def doc_spectral_masks() -> str:
|
456
|
+
default = f"\nDefault value: {default_config()['spectral_masks']}"
|
457
|
+
# fmt: off
|
458
|
+
return """
|
459
|
+
'spectral_masks' is a mixture database configuration parameter that specifies
|
460
|
+
a list of spectral mask rules.
|
461
|
+
|
462
|
+
All other effects are applied including SNR and a mixture is generated
|
463
|
+
and then the spectral mask rules are applied to the resulting mixture feature.
|
464
|
+
|
465
|
+
Rules must specify all the following parameters:
|
466
|
+
|
467
|
+
'f_max_width' Frequency mask maximum width in bins
|
468
|
+
'f_num' Number of frequency masks to apply (set to 0 to apply none)
|
469
|
+
't_max_width' Time mask maximum width in frames
|
470
|
+
't_num' Number of time masks to apply (set to 0 to apply none)
|
471
|
+
't_max_percent' Upper bound on the width of the time mask in percent
|
472
|
+
""" + default
|
473
|
+
# fmt: on
|
474
|
+
|
475
|
+
|
476
|
+
def doc_config() -> str:
|
477
|
+
from ..config.constants import VALID_CONFIGS
|
478
|
+
|
479
|
+
text = "\n"
|
480
|
+
text += "The SonusAI database is defined using a config.yml file.\n\n"
|
481
|
+
text += "See the following for details:\n\n"
|
482
|
+
for c in VALID_CONFIGS:
|
483
|
+
text += f" {c}\n"
|
484
|
+
return text
|
485
|
+
|
486
|
+
|
487
|
+
def doc_asr_configs() -> str:
|
488
|
+
from ..utils.asr import get_available_engines
|
489
|
+
|
490
|
+
default = f"\nDefault value: {default_config()['asr_configs']}"
|
491
|
+
engines = get_available_engines()
|
492
|
+
# fmt: off
|
493
|
+
text = """
|
494
|
+
'asr_configs' is a mixture database configuration parameter that sets the list of
|
495
|
+
ASR engine(s) to use.
|
496
|
+
|
497
|
+
Required fields:
|
498
|
+
|
499
|
+
'name' Unique identifier for the ASR engine.
|
500
|
+
'engine' ASR engine to use. Available engines:
|
501
|
+
"""
|
502
|
+
text += f" {', '.join(engines)}\n"
|
503
|
+
text += """
|
504
|
+
Optional fields:
|
505
|
+
|
506
|
+
'model' Some ASR engines allow the specification of a model, but note most are
|
507
|
+
very computationally demanding and can overwhelm/hang a local system.
|
508
|
+
Available whisper ASR engines:
|
509
|
+
tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large
|
510
|
+
'device' Some ASR engines allow the specification of a device, either 'cpu' or 'cuda'.
|
511
|
+
'cpu_threads' Some ASR engines allow the specification of the number of CPU threads to use.
|
512
|
+
'compute_type' Some ASR engines allow the specification of a compute type, e.g. 'int8'.
|
513
|
+
'beam_size' Some ASR engines allow the specification of a beam size.
|
514
|
+
<other> Other parameters can be injected into the ASR engine as needed; all
|
515
|
+
fields in each config are forwarded to the given engine.
|
516
|
+
|
517
|
+
Example:
|
518
|
+
|
519
|
+
asr_configs:
|
520
|
+
- name: faster_tiny_cuda
|
521
|
+
engine: faster_whisper
|
522
|
+
model: tiny
|
523
|
+
device: cuda
|
524
|
+
beam_size: 5
|
525
|
+
- name: google
|
526
|
+
engine: google
|
527
|
+
|
528
|
+
Creates two ASR engines for use named faster_tiny_cuda and google.
|
529
|
+
"""
|
530
|
+
# fmt: on
|
531
|
+
return text + default
|