sonusai 0.17.2__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonusai/__init__.py +0 -1
- sonusai/audiofe.py +3 -3
- sonusai/calc_metric_spenh.py +81 -52
- sonusai/doc/doc.py +0 -24
- sonusai/genmetrics.py +146 -0
- sonusai/genmixdb.py +0 -2
- sonusai/mixture/__init__.py +0 -1
- sonusai/mixture/constants.py +0 -1
- sonusai/mixture/datatypes.py +2 -9
- sonusai/mixture/generation.py +136 -38
- sonusai/mixture/helpers.py +58 -1
- sonusai/mixture/mapped_snr_f.py +56 -9
- sonusai/mixture/mixdb.py +293 -170
- sonusai/mixture/sox_augmentation.py +3 -0
- sonusai/mixture/tokenized_shell_vars.py +8 -1
- sonusai/mkwav.py +4 -4
- sonusai/onnx_predict.py +2 -2
- sonusai/post_spenh_targetf.py +2 -2
- sonusai/speech/textgrid.py +6 -24
- sonusai/speech/{voxceleb2.py → voxceleb.py} +19 -3
- sonusai/utils/__init__.py +1 -1
- sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- sonusai/utils/{wave.py → write_audio.py} +2 -2
- {sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/METADATA +4 -1
- {sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/RECORD +27 -33
- sonusai/mixture/speaker_metadata.py +0 -35
- sonusai/mkmanifest.py +0 -209
- sonusai/utils/asr_manifest_functions/__init__.py +0 -6
- sonusai/utils/asr_manifest_functions/data.py +0 -1
- sonusai/utils/asr_manifest_functions/librispeech.py +0 -46
- sonusai/utils/asr_manifest_functions/mcgill_speech.py +0 -29
- sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -66
- {sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/WHEEL +0 -0
- {sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/entry_points.txt +0 -0
@@ -1,66 +0,0 @@
|
|
1
|
-
from sonusai.utils import PathInfo
|
2
|
-
from sonusai.utils.asr_manifest_functions import TranscriptData
|
3
|
-
|
4
|
-
|
5
|
-
def collect_vctk_noisy_speech_transcripts(paths: list[str] | str) -> TranscriptData:
|
6
|
-
from glob import iglob
|
7
|
-
from os import listdir
|
8
|
-
from os.path import abspath
|
9
|
-
from os.path import basename
|
10
|
-
from os.path import join
|
11
|
-
from os.path import split
|
12
|
-
from os.path import splitext
|
13
|
-
|
14
|
-
from sonusai import SonusAIError
|
15
|
-
|
16
|
-
entries: TranscriptData = {}
|
17
|
-
if not isinstance(paths, list):
|
18
|
-
paths = [paths]
|
19
|
-
|
20
|
-
for p in paths:
|
21
|
-
abs_p = abspath(p)
|
22
|
-
head, tail = split(abs_p)
|
23
|
-
|
24
|
-
dirs = listdir(head)
|
25
|
-
tail = tail.replace('wav', 'txt')
|
26
|
-
|
27
|
-
location = None
|
28
|
-
for d in dirs:
|
29
|
-
if tail.endswith(d):
|
30
|
-
location = join(head, d, '*.txt')
|
31
|
-
break
|
32
|
-
if location is None:
|
33
|
-
raise SonusAIError(f'Could not find VCTK Noisy Speech transcript data for {p}')
|
34
|
-
|
35
|
-
for file in iglob(pathname=location, recursive=True):
|
36
|
-
with open(file, encoding='utf-8') as f:
|
37
|
-
lines = f.readlines()
|
38
|
-
if len(lines) != 1:
|
39
|
-
raise SonusAIError(f'Ill-formed VCTK Noisy Speech transcript file: {file}')
|
40
|
-
|
41
|
-
name = join(abs_p, splitext(basename(file))[0])
|
42
|
-
text = lines[0].lower().strip()
|
43
|
-
|
44
|
-
if name in entries:
|
45
|
-
raise SonusAIError(f'{name} already exists in transcript data')
|
46
|
-
entries[name] = text.lower().strip()
|
47
|
-
|
48
|
-
return entries
|
49
|
-
|
50
|
-
|
51
|
-
def get_vctk_noisy_speech_manifest_entry(entry: PathInfo, transcript_data: TranscriptData) -> dict:
|
52
|
-
from os.path import splitext
|
53
|
-
from subprocess import check_output
|
54
|
-
|
55
|
-
from sonusai import SonusAIError
|
56
|
-
|
57
|
-
name = splitext(entry.abs_path)[0]
|
58
|
-
duration = float(check_output(f'soxi -D {entry.abs_path}', shell=True))
|
59
|
-
if name not in transcript_data.keys():
|
60
|
-
raise SonusAIError(f'Could not find {name} in transcript data')
|
61
|
-
|
62
|
-
return {
|
63
|
-
'audio_filepath': entry.audio_filepath,
|
64
|
-
'text': transcript_data[name],
|
65
|
-
'duration': duration,
|
66
|
-
}
|
File without changes
|
File without changes
|