sonusai 1.0.16__cp311-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. sonusai/__init__.py +170 -0
  2. sonusai/aawscd_probwrite.py +148 -0
  3. sonusai/audiofe.py +481 -0
  4. sonusai/calc_metric_spenh.py +1136 -0
  5. sonusai/config/__init__.py +0 -0
  6. sonusai/config/asr.py +21 -0
  7. sonusai/config/config.py +65 -0
  8. sonusai/config/config.yml +49 -0
  9. sonusai/config/constants.py +53 -0
  10. sonusai/config/ir.py +124 -0
  11. sonusai/config/ir_delay.py +62 -0
  12. sonusai/config/source.py +275 -0
  13. sonusai/config/spectral_masks.py +15 -0
  14. sonusai/config/truth.py +64 -0
  15. sonusai/constants.py +14 -0
  16. sonusai/data/__init__.py +0 -0
  17. sonusai/data/silero_vad_v5.1.jit +0 -0
  18. sonusai/data/silero_vad_v5.1.onnx +0 -0
  19. sonusai/data/speech_ma01_01.wav +0 -0
  20. sonusai/data/whitenoise.wav +0 -0
  21. sonusai/datatypes.py +383 -0
  22. sonusai/deprecated/gentcst.py +632 -0
  23. sonusai/deprecated/plot.py +519 -0
  24. sonusai/deprecated/tplot.py +365 -0
  25. sonusai/doc.py +52 -0
  26. sonusai/doc_strings/__init__.py +1 -0
  27. sonusai/doc_strings/doc_strings.py +531 -0
  28. sonusai/genft.py +196 -0
  29. sonusai/genmetrics.py +183 -0
  30. sonusai/genmix.py +199 -0
  31. sonusai/genmixdb.py +235 -0
  32. sonusai/ir_metric.py +551 -0
  33. sonusai/lsdb.py +141 -0
  34. sonusai/main.py +134 -0
  35. sonusai/metrics/__init__.py +43 -0
  36. sonusai/metrics/calc_audio_stats.py +42 -0
  37. sonusai/metrics/calc_class_weights.py +90 -0
  38. sonusai/metrics/calc_optimal_thresholds.py +73 -0
  39. sonusai/metrics/calc_pcm.py +45 -0
  40. sonusai/metrics/calc_pesq.py +36 -0
  41. sonusai/metrics/calc_phase_distance.py +43 -0
  42. sonusai/metrics/calc_sa_sdr.py +64 -0
  43. sonusai/metrics/calc_sample_weights.py +25 -0
  44. sonusai/metrics/calc_segsnr_f.py +82 -0
  45. sonusai/metrics/calc_speech.py +382 -0
  46. sonusai/metrics/calc_wer.py +71 -0
  47. sonusai/metrics/calc_wsdr.py +57 -0
  48. sonusai/metrics/calculate_metrics.py +395 -0
  49. sonusai/metrics/class_summary.py +74 -0
  50. sonusai/metrics/confusion_matrix_summary.py +75 -0
  51. sonusai/metrics/one_hot.py +283 -0
  52. sonusai/metrics/snr_summary.py +128 -0
  53. sonusai/metrics_summary.py +314 -0
  54. sonusai/mixture/__init__.py +15 -0
  55. sonusai/mixture/audio.py +187 -0
  56. sonusai/mixture/class_balancing.py +103 -0
  57. sonusai/mixture/constants.py +3 -0
  58. sonusai/mixture/data_io.py +173 -0
  59. sonusai/mixture/db.py +169 -0
  60. sonusai/mixture/db_datatypes.py +92 -0
  61. sonusai/mixture/effects.py +344 -0
  62. sonusai/mixture/feature.py +78 -0
  63. sonusai/mixture/generation.py +1116 -0
  64. sonusai/mixture/helpers.py +351 -0
  65. sonusai/mixture/ir_effects.py +77 -0
  66. sonusai/mixture/log_duration_and_sizes.py +23 -0
  67. sonusai/mixture/mixdb.py +1857 -0
  68. sonusai/mixture/pad_audio.py +35 -0
  69. sonusai/mixture/resample.py +7 -0
  70. sonusai/mixture/sox_effects.py +195 -0
  71. sonusai/mixture/sox_help.py +650 -0
  72. sonusai/mixture/spectral_mask.py +51 -0
  73. sonusai/mixture/truth.py +61 -0
  74. sonusai/mixture/truth_functions/__init__.py +45 -0
  75. sonusai/mixture/truth_functions/crm.py +105 -0
  76. sonusai/mixture/truth_functions/energy.py +222 -0
  77. sonusai/mixture/truth_functions/file.py +48 -0
  78. sonusai/mixture/truth_functions/metadata.py +24 -0
  79. sonusai/mixture/truth_functions/metrics.py +28 -0
  80. sonusai/mixture/truth_functions/phoneme.py +18 -0
  81. sonusai/mixture/truth_functions/sed.py +98 -0
  82. sonusai/mixture/truth_functions/target.py +142 -0
  83. sonusai/mkwav.py +135 -0
  84. sonusai/onnx_predict.py +363 -0
  85. sonusai/parse/__init__.py +0 -0
  86. sonusai/parse/expand.py +156 -0
  87. sonusai/parse/parse_source_directive.py +129 -0
  88. sonusai/parse/rand.py +214 -0
  89. sonusai/py.typed +0 -0
  90. sonusai/queries/__init__.py +0 -0
  91. sonusai/queries/queries.py +239 -0
  92. sonusai/rs.abi3.so +0 -0
  93. sonusai/rs.pyi +1 -0
  94. sonusai/rust/__init__.py +0 -0
  95. sonusai/speech/__init__.py +0 -0
  96. sonusai/speech/l2arctic.py +121 -0
  97. sonusai/speech/librispeech.py +102 -0
  98. sonusai/speech/mcgill.py +71 -0
  99. sonusai/speech/textgrid.py +89 -0
  100. sonusai/speech/timit.py +138 -0
  101. sonusai/speech/types.py +12 -0
  102. sonusai/speech/vctk.py +53 -0
  103. sonusai/speech/voxceleb.py +108 -0
  104. sonusai/utils/__init__.py +3 -0
  105. sonusai/utils/asl_p56.py +130 -0
  106. sonusai/utils/asr.py +91 -0
  107. sonusai/utils/asr_functions/__init__.py +3 -0
  108. sonusai/utils/asr_functions/aaware_whisper.py +69 -0
  109. sonusai/utils/audio_devices.py +50 -0
  110. sonusai/utils/braced_glob.py +50 -0
  111. sonusai/utils/calculate_input_shape.py +26 -0
  112. sonusai/utils/choice.py +51 -0
  113. sonusai/utils/compress.py +25 -0
  114. sonusai/utils/convert_string_to_number.py +6 -0
  115. sonusai/utils/create_timestamp.py +5 -0
  116. sonusai/utils/create_ts_name.py +14 -0
  117. sonusai/utils/dataclass_from_dict.py +27 -0
  118. sonusai/utils/db.py +16 -0
  119. sonusai/utils/docstring.py +53 -0
  120. sonusai/utils/energy_f.py +44 -0
  121. sonusai/utils/engineering_number.py +166 -0
  122. sonusai/utils/evaluate_random_rule.py +15 -0
  123. sonusai/utils/get_frames_per_batch.py +2 -0
  124. sonusai/utils/get_label_names.py +20 -0
  125. sonusai/utils/grouper.py +6 -0
  126. sonusai/utils/human_readable_size.py +7 -0
  127. sonusai/utils/keyboard_interrupt.py +12 -0
  128. sonusai/utils/load_object.py +21 -0
  129. sonusai/utils/max_text_width.py +9 -0
  130. sonusai/utils/model_utils.py +28 -0
  131. sonusai/utils/numeric_conversion.py +11 -0
  132. sonusai/utils/onnx_utils.py +155 -0
  133. sonusai/utils/parallel.py +162 -0
  134. sonusai/utils/path_info.py +7 -0
  135. sonusai/utils/print_mixture_details.py +60 -0
  136. sonusai/utils/rand.py +13 -0
  137. sonusai/utils/ranges.py +43 -0
  138. sonusai/utils/read_predict_data.py +32 -0
  139. sonusai/utils/reshape.py +154 -0
  140. sonusai/utils/seconds_to_hms.py +7 -0
  141. sonusai/utils/stacked_complex.py +82 -0
  142. sonusai/utils/stratified_shuffle_split.py +170 -0
  143. sonusai/utils/tokenized_shell_vars.py +143 -0
  144. sonusai/utils/write_audio.py +26 -0
  145. sonusai/utils/yes_or_no.py +8 -0
  146. sonusai/vars.py +47 -0
  147. sonusai-1.0.16.dist-info/METADATA +56 -0
  148. sonusai-1.0.16.dist-info/RECORD +150 -0
  149. sonusai-1.0.16.dist-info/WHEEL +4 -0
  150. sonusai-1.0.16.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,531 @@
1
+ from ..config.config import default_config
2
+
3
+
4
+ def doc_seed() -> str:
5
+ default = f"\nDefault value: {default_config()['seed']}"
6
+ # fmt: off
7
+ return """
8
+ 'seed' is a mixture database configuration parameter that sets the random number
9
+ generator seed.
10
+ """ + default
11
+ # fmt: on
12
+
13
+
14
+ def doc_feature() -> str:
15
+ default = f"\nDefault value: {default_config()['feature']}"
16
+ # fmt: off
17
+ return """
18
+ 'feature' is a mixture database configuration parameter that sets the feature
19
+ to use.
20
+ """ + default
21
+ # fmt: on
22
+
23
+
24
+ def doc_level_type() -> str:
25
+ default = f"\nDefault value: {default_config()['level_type']}"
26
+ # fmt: off
27
+ return """
28
+ 'level_type' is a mixture database configuration parameter that sets the
29
+ algorithm to use to determine energy level for SNR calculations.
30
+ Supported values are:
31
+
32
+ default mean of squares
33
+ speech ITU-T P.56 active speech level method B
34
+ """ + default
35
+ # fmt: on
36
+
37
+
38
+ def doc_sources() -> str:
39
+ default = f"\nDefault value: {default_config()['sources']}"
40
+ # fmt: off
41
+ return """
42
+ 'sources' is a mixture database configuration parameter that sets the list of
43
+ sources to use.
44
+
45
+ Two sources are required: 'primary' and 'noise'. Additional sources may be
46
+ specified with arbitrary names.
47
+
48
+ Each source has the following fields:
49
+
50
+ 'files' Required list of files to use. Sub-fields:
51
+ 'name' File name. May be one of the following:
52
+ audio Supported formats are .wav, .mp3, .m4a, .aif, .flac, and .ogg
53
+ glob Matches file glob patterns
54
+ .yml The given YAML file is parsed into the list
55
+ .txt Each line in the given text file indicates an item which
56
+ may be anything in this list (audio, glob, .yml, or .txt)
57
+ 'class_indices' Optional list of class indices
58
+
59
+ 'truth_configs' Required list of truth config(s) to use for this source. Sub-fields:
60
+ '<name>' Name of truth config. Sub-fields:
61
+ 'function' Truth function
62
+ 'stride_reduction' Stride reduction method to use. May be one of: none, max
63
+
64
+ 'level_type'
65
+ Source-specific override for level_type.
66
+
67
+ Example:
68
+
69
+ targets:
70
+ - name: data/esc50/ESC-50-master/audio/1-*.wav
71
+ truth_configs:
72
+ sed:
73
+ thresholds: [-38, -41, -48]
74
+ index: 2
75
+ class_balancing_effect: { }
76
+ - name: target.mp3
77
+ truth_configs:
78
+ sed:
79
+ thresholds: [-37, -40, -46]
80
+ index: 5
81
+ """ + default
82
+ # fmt: on
83
+
84
+
85
+ def doc_num_classes() -> str:
86
+ default = f"\nDefault value: {default_config()['num_classes']}"
87
+ # fmt: off
88
+ return """
89
+ 'num_classes' is a mixture database configuration parameter that sets the number of
90
+ classes in this dataset. The number of classes is the total number of parameters
91
+ (or classes or labels) in the truth. This controls the size of the truth input to
92
+ the model.
93
+
94
+ Note that the model output 'parameters' dimension is NOT necessarily the same size
95
+ as the truth 'num_classes' dimension; there may be multiple truth functions combined
96
+ in the truth, e.g., for use in loss function calculations.
97
+ """ + default
98
+ # fmt: on
99
+
100
+
101
+ def doc_class_labels() -> str:
102
+ default = f"\nDefault value: {default_config()['class_labels']}"
103
+ # fmt: off
104
+ return """
105
+ 'class_labels' is a mixture database configuration parameter that sets class labels
106
+ in this dataset.
107
+ """ + default
108
+ # fmt: on
109
+
110
+
111
+ def doc_class_weights_threshold() -> str:
112
+ default = f"\nDefault value: {default_config()['class_weights_threshold']}"
113
+ # fmt: off
114
+ return """
115
+ 'class_weights_threshold' is a mixture database configuration parameter that sets
116
+ the threshold for class weights calculation to quantize truth to binary for counting.
117
+
118
+ Supports scalar or list:
119
+
120
+ scalar use for all classes
121
+ list must be of num_classes length
122
+ """ + default
123
+ # fmt: on
124
+
125
+
126
+ def get_truth_functions() -> str:
127
+ from ..mixture import truth_functions
128
+
129
+ functions = [function for function in dir(truth_functions) if not function.startswith("__")]
130
+ text = "\nSupported truth functions:\n\n"
131
+ for function in functions:
132
+ docs = getattr(truth_functions, function).__doc__
133
+ if docs is not None:
134
+ text += f" {function}\n"
135
+ for doc in docs.splitlines():
136
+ text += f" {doc}\n"
137
+ return text
138
+
139
+
140
+ def doc_truth_configs() -> str:
141
+ import yaml
142
+
143
+ default = f"\nDefault value:\n\n{yaml.dump(default_config()['truth_configs'])}"
144
+ # fmt: off
145
+ return """
146
+ 'truth_configs' is a mixture database configuration parameter that sets the truth
147
+ generation configurations for targets. There is a global 'truth_configs' and there may be
148
+ target-specific 'truth_configs'.
149
+
150
+ A truth config creates a type of truth and is associated with target file(s).
151
+ Target files may have multiple truth settings.
152
+
153
+ Truth is always generated per transform frame.
154
+
155
+ Note that there is a difference between transform frames and feature frames: a feature
156
+ frame may be decimated and may have a stride dimension greater than 1 (which aggregates
157
+ multiple transform frames in a single feature).
158
+
159
+ There are two notions of truth data: truth_t and truth_f. truth_t is what the truth
160
+ functions always generate and is in the transform frame domain [transform_frames, truth_parameters].
161
+ truth_f, or truth in the feature domain, is created by passing truth_t into the feature
162
+ generator which produces feature frame domain truth data [feature_frames, stride or 1, truth_parameters].
163
+
164
+ The stride dimension may be reduced using the 'stride_reduction' parameter. Supported stride
165
+ reduction methods:
166
+ 'none' preserve the stride dimension with no change
167
+ 'max' reduce the stride dimension to 1 by taking the max
168
+ 'mean' reduce the stride dimension to 1 by taking the mean
169
+ 'first' reduce the stride dimension to 1 by taking the value in the first stride index
170
+
171
+ The 'truth_configs' parameter specifies the following:
172
+
173
+ 'name' Name of truth configuration
174
+ 'function' Name of truth function to use
175
+ 'stride_reduction'
176
+ Name of stride reduction method to use
177
+ '<param1>' Function-specific configuration parameter
178
+ '<paramN>' Function-specific configuration parameter
179
+ 'class_balancing_effect'
180
+ Class balancing effect.
181
+ This truth configuration will use this rule for class balancing operations.
182
+ If this rule is empty or unspecified, then this truth function will not
183
+ perform class balancing.
184
+
185
+ Class balancing ensures that each class in a sound classification dataset
186
+ is represented equally (i.e., each class has the same number of augmented
187
+ targets). This is achieved by creating new class balancing effect
188
+ rules and applying them to targets in underrepresented classes to create
189
+ more effected targets for those classes.
190
+
191
+ This rule must contain at least one random entry in order to guarantee
192
+ unique additional data.
193
+
194
+ See 'effects' for details on effect rules.
195
+ """ + get_truth_functions() + default
196
+ # fmt: on
197
+
198
+
199
+ def doc_effects() -> str:
200
+ # fmt: off
201
+ return """
202
+ Augmentation Rules
203
+
204
+ These rules may be specified for target and/or noise. Each rule will be
205
+ applied for each target/noise. The values may be specified as scalars, lists,
206
+ or random using the syntax: 'rand(<min>, <max>)'.
207
+
208
+ If a value is specified as a list, then the rule is repeated for each value in
209
+ the list.
210
+
211
+ If a value is specified using rand, then a randomized rule is generated
212
+ dynamically per use.
213
+
214
+ Rules may specify any or all of the following effects:
215
+
216
+ 'normalize' Normalize audio file to the specified level (in dBFS).
217
+ 'gain' Apply an amplification or an attenuation to the audio signal.
218
+ The signal level is adjusted by the given number of dB; positive
219
+ amplifies, negative attenuates, 0 does nothing.
220
+ 'pitch' Change the audio pitch (but not its tempo). Pitch amount is
221
+ specified as positive or negative 'cents' (i.e., 100ths of a
222
+ semitone).
223
+ 'tempo' Change the audio tempo (but not its pitch). Tempo amount is
224
+ specified as the ratio of the new tempo to the old tempo. For
225
+ example, '1.1' speeds up the tempo by 10% and '0.9' slows it
226
+ down by 10%.
227
+ 'eq1' Apply a two-pole peaking equalization filter. EQ parameters are
228
+ specified as a [frequency, width, gain] triple where:
229
+ 'frequency' gives the central frequency in Hz (20 - SR/2),
230
+ 'width' gives the width as a Q-factor (0.3 - 2.0), and
231
+ 'gain' gives the gain in dB (-20 - 20).
232
+ 'eq2' Apply an additional band of EQ. Same as 'eq1'
233
+ 'eq3' Apply an additional band of EQ. Same as 'eq1'
234
+ 'lpf' Apply a low-pass Butterworth filter. The 3 dB point frequency is
235
+ specified in Hz (20 - SR/2).
236
+ 'ir' An index into a list of impulse responses (specified in the
237
+ 'impulse_responses' parameter).
238
+ For targets, the impulse response is applied AFTER truth generation
239
+ and the resulting audio is still aligned with the truth. Random
240
+ syntax for 'ir' is one of the following:
241
+ 'choose()' chooses a random IR from the entire list
242
+ 'choose(<min>, <max>)' chooses a random IR in the range <min> to <max>
243
+ 'choose(<tag>) chooses a random IR that matches <tag>
244
+
245
+ Only the specified effects for a given rule are applied; all others are
246
+ skipped in the given rule. For example, if a rule only specifies 'tempo',
247
+ then only a tempo effect is applied and all other possible effects
248
+ are ignored (e.g., 'gain', 'pitch', etc.).
249
+
250
+ Example:
251
+
252
+ target_effects:
253
+ - normalize: -3.5
254
+ - normalize: -3.5
255
+ pitch: [-300, 300]
256
+ tempo: [0.8, 1.2]
257
+ eq1: [[1000, 0.8, 3], [600, 1.0, -4], [800, 0.6, 0]]
258
+ - normalize: -3.5
259
+ pitch: "rand(-300, 300)"
260
+ eq1: ["rand(100, 6000)", "rand(0.6, 1.0)", "rand(-6, 6)"]
261
+ lpf: "rand(1000, 8000)"
262
+ - tempo: "rand(0.9, 1.1)"
263
+ eq1: [["rand(100, 7500)", 0.8, -10], ["rand(100, 7500)", 0.8, 10]]
264
+
265
+ There are four rules given in this example.
266
+
267
+ The first rule is simple:
268
+ - normalize: -3.5
269
+
270
+ This results in just one effect being applied to each target:
271
+
272
+ normalize: -3.5
273
+
274
+ The second rule illustrates the use of lists to specify values:
275
+ - normalize: -3.5
276
+ pitch: [-300, 300]
277
+ tempo: [0.8, 1.2]
278
+ eq1: [[1000, 0.8, 3], [600, 1.0, -4], [800, 0.6, 0]]
279
+
280
+ There are two values given for pitch, two for tempo, and three for EQ. This
281
+ rule expands to 2 * 2 * 3 = 12 unique effects being applied to each
282
+ target:
283
+
284
+ normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [1000, 0.8, 3]
285
+ normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [ 600, 1.0, -4]
286
+ normalize: -3.5, pitch: -3, tempo: 0.8, eq1: [ 800, 0.6, 0]
287
+ normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [1000, 0.8, 3]
288
+ normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [ 600, 1.0, -4]
289
+ normalize: -3.5, pitch: -3, tempo: 1.2, eq1: [ 800, 0.6, 0]
290
+ normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [1000, 0.8, 3]
291
+ normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [ 600, 1.0, -4]
292
+ normalize: -3.5, pitch: 3, tempo: 0.8, eq1: [ 800, 0.6, 0]
293
+ normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [1000, 0.8, 3]
294
+ normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [ 600, 1.0, -4]
295
+ normalize: -3.5, pitch: 3, tempo: 1.2, eq1: [ 800, 0.6, 0]
296
+
297
+ The third rule shows the use of rand:
298
+ - normalize: -3.5
299
+ pitch: "rand(-300, 300)"
300
+ eq1: ["rand(100, 6000)", "rand(0.6, 1.0)", "rand(-6, 6)"]
301
+ lpf: "rand(1000, 8000)"
302
+
303
+ This rule is used to create randomized effects per use.
304
+
305
+ The fourth rule demonstrates the use of scalars, lists, and rand:
306
+ - tempo: [0.9, 1, 1.1]
307
+ eq1: [["rand(100, 7500)", 0.8, -10], ["rand(100, 7500)", 0.8, 10]]
308
+
309
+ This rule expands to 6 unique effects being applied to each target
310
+ (list of 3 * list of 2). Here is the expansion:
311
+
312
+ tempo: 0.9, eq1: ["rand(100, 7500)", 0.8, -10]
313
+ tempo: 1.0, eq1: ["rand(100, 7500)", 0.8, -10]
314
+ tempo: 1.1, eq1: ["rand(100, 7500)", 0.8, -10]
315
+ tempo: 0.9, eq1: ["rand(100, 7500)", 0.8, 10]
316
+ tempo: 1.0, eq1: ["rand(100, 7500)", 0.8, 10]
317
+ tempo: 1.1, eq1: ["rand(100, 7500)", 0.8, 10]"""
318
+ # fmt: on
319
+
320
+
321
+ def doc_target_effects() -> str:
322
+ import yaml
323
+
324
+ default = f"\nDefault value:\n\n{yaml.dump(default_config()['target_effects'])}"
325
+ # fmt: off
326
+ return """
327
+ 'target_effects' is a mixture database configuration parameter that
328
+ specifies a list of effect rules to use for each target.
329
+
330
+ See 'effects' for details on effect rules.
331
+ """ + default
332
+ # fmt: on
333
+
334
+
335
+ def doc_target_distortions() -> str:
336
+ import yaml
337
+
338
+ default = f"\nDefault value:\n\n{yaml.dump(default_config()['target_distortions'])}"
339
+ # fmt: off
340
+ return """
341
+ 'target_distortions' is a mixture database configuration parameter that
342
+ specifies a list of distortion rules to use for each target.
343
+
344
+ See 'effects' for details on distortion rules.
345
+ """ + default
346
+ # fmt: on
347
+
348
+
349
+ def doc_noises() -> str:
350
+ default = f"\nDefault value: {default_config()['class_balancing']}"
351
+ # fmt: off
352
+ return """
353
+ 'noises' is a mixture database configuration parameter that sets the list of
354
+ noises to use.
355
+
356
+ Required field:
357
+
358
+ 'name'
359
+ File name. May be one of the following:
360
+
361
+ audio Supported formats are .wav, .mp3, .aif, .flac, and .ogg
362
+ glob Matches file glob patterns
363
+ .yml The given YAML file is parsed into the list
364
+ .txt Each line in the given text file indicates an item which
365
+ may be anything in this list (audio, glob, .yml, or .txt)
366
+ """ + default
367
+ # fmt: on
368
+
369
+
370
+ def doc_noise_effects() -> str:
371
+ import yaml
372
+
373
+ default = f"\nDefault value:\n\n{yaml.dump(default_config()['noise_effects'])}"
374
+
375
+ # fmt: off
376
+ return """
377
+ 'noise_effects' is a mixture database configuration parameter that
378
+ specifies a list of effect rules to use for each noise.
379
+
380
+ See 'effects' for details on effect rules.
381
+ """ + default
382
+ # fmt: on
383
+
384
+
385
+ def doc_snrs() -> str:
386
+ default = f"\nDefault value: {default_config()['snrs']}"
387
+ # fmt: off
388
+ return """
389
+ 'snrs' is a mixture database configuration parameter that specifies a list
390
+ of required signal-to-noise ratios (in dB).
391
+
392
+ All other effects are applied to both target and noise and then the
393
+ energy levels are measured and the appropriate noise gain calculated to
394
+ achieve the desired SNR.
395
+
396
+ Special values:
397
+
398
+ -99 Noise only mixture (no target)
399
+ 99 Target only mixture (no noise)
400
+ """ + default
401
+ # fmt: on
402
+
403
+
404
+ def doc_random_snrs() -> str:
405
+ default = f"\nDefault value: {default_config()['random_snrs']}"
406
+ # fmt: off
407
+ return """
408
+ 'random_snrs' is a mixture database configuration parameter that specifies a
409
+ list of random signal-to-noise ratios. The value(s) must be specified as
410
+ random using the syntax: 'rand(<min>, <max>)'.
411
+
412
+ Random SNRs behave slightly differently from regular or ordered SNRs. As with
413
+ ordered SNRs, all other effects are applied to both target and noise and
414
+ then the energy levels are measured and the appropriate noise gain calculated
415
+ to achieve the desired SNR. However, unlike ordered SNRs, the desired SNR is
416
+ randomized (per the given rule(s)) for each mixture, i.e., previous random
417
+ SNRs are not saved and reused.
418
+ """ + default
419
+ # fmt: on
420
+
421
+
422
+ def doc_noise_mix_mode() -> str:
423
+ default = f"\nDefault value: {default_config()['noise_mix_mode']}"
424
+ # fmt: off
425
+ return """
426
+ 'noise_mix_mode' is a mixture database configuration parameter that sets
427
+ how to mix noises with targets.
428
+
429
+ Supported modes:
430
+
431
+ exhaustive Use every noise/effect with every primary/effect.
432
+ non-exhaustive Cycle through every primary/effect without necessarily
433
+ using all noise/effect combinations (reduced data set).
434
+ non-combinatorial Combine a primary/effect with a single cut of a
435
+ noise/effect non-exhaustively (each primary/effect
436
+ does not use each noise/effect). Cut has a random start
437
+ and loops back to the beginning if the end of a
438
+ noise/effect is reached.
439
+ """ + default
440
+ # fmt: on
441
+
442
+
443
+ def doc_impulse_responses() -> str:
444
+ default = f"\nDefault value: {default_config()['impulse_responses']}"
445
+ # fmt: off
446
+ return """
447
+ 'impulse_responses' is a mixture database configuration parameter that specifies a
448
+ list of impulse response files to use.
449
+
450
+ See 'effects' for details.
451
+ """ + default
452
+ # fmt: on
453
+
454
+
455
+ def doc_spectral_masks() -> str:
456
+ default = f"\nDefault value: {default_config()['spectral_masks']}"
457
+ # fmt: off
458
+ return """
459
+ 'spectral_masks' is a mixture database configuration parameter that specifies
460
+ a list of spectral mask rules.
461
+
462
+ All other effects are applied including SNR and a mixture is generated
463
+ and then the spectral mask rules are applied to the resulting mixture feature.
464
+
465
+ Rules must specify all the following parameters:
466
+
467
+ 'f_max_width' Frequency mask maximum width in bins
468
+ 'f_num' Number of frequency masks to apply (set to 0 to apply none)
469
+ 't_max_width' Time mask maximum width in frames
470
+ 't_num' Number of time masks to apply (set to 0 to apply none)
471
+ 't_max_percent' Upper bound on the width of the time mask in percent
472
+ """ + default
473
+ # fmt: on
474
+
475
+
476
+ def doc_config() -> str:
477
+ from ..config.constants import VALID_CONFIGS
478
+
479
+ text = "\n"
480
+ text += "The SonusAI database is defined using a config.yml file.\n\n"
481
+ text += "See the following for details:\n\n"
482
+ for c in VALID_CONFIGS:
483
+ text += f" {c}\n"
484
+ return text
485
+
486
+
487
+ def doc_asr_configs() -> str:
488
+ from ..utils.asr import get_available_engines
489
+
490
+ default = f"\nDefault value: {default_config()['asr_configs']}"
491
+ engines = get_available_engines()
492
+ # fmt: off
493
+ text = """
494
+ 'asr_configs' is a mixture database configuration parameter that sets the list of
495
+ ASR engine(s) to use.
496
+
497
+ Required fields:
498
+
499
+ 'name' Unique identifier for the ASR engine.
500
+ 'engine' ASR engine to use. Available engines:
501
+ """
502
+ text += f" {', '.join(engines)}\n"
503
+ text += """
504
+ Optional fields:
505
+
506
+ 'model' Some ASR engines allow the specification of a model, but note most are
507
+ very computationally demanding and can overwhelm/hang a local system.
508
+ Available whisper ASR engines:
509
+ tiny.en, tiny, base.en, base, small.en, small, medium.en, medium, large-v1, large-v2, large
510
+ 'device' Some ASR engines allow the specification of a device, either 'cpu' or 'cuda'.
511
+ 'cpu_threads' Some ASR engines allow the specification of the number of CPU threads to use.
512
+ 'compute_type' Some ASR engines allow the specification of a compute type, e.g. 'int8'.
513
+ 'beam_size' Some ASR engines allow the specification of a beam size.
514
+ <other> Other parameters can be injected into the ASR engine as needed; all
515
+ fields in each config are forwarded to the given engine.
516
+
517
+ Example:
518
+
519
+ asr_configs:
520
+ - name: faster_tiny_cuda
521
+ engine: faster_whisper
522
+ model: tiny
523
+ device: cuda
524
+ beam_size: 5
525
+ - name: google
526
+ engine: google
527
+
528
+ Creates two ASR engines for use named faster_tiny_cuda and google.
529
+ """
530
+ # fmt: on
531
+ return text + default