sonusai 0.17.2__tar.gz → 0.17.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonusai-0.17.2 → sonusai-0.17.3}/PKG-INFO +2 -1
- {sonusai-0.17.2 → sonusai-0.17.3}/pyproject.toml +3 -1
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/audiofe.py +3 -3
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/calc_metric_spenh.py +7 -7
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/mixdb.py +0 -1
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/sox_augmentation.py +3 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mkwav.py +4 -4
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/onnx_predict.py +2 -2
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/post_spenh_targetf.py +2 -2
- sonusai-0.17.2/sonusai/speech/voxceleb2.py → sonusai-0.17.3/sonusai/speech/voxceleb.py +19 -3
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/__init__.py +1 -1
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_functions/aaware_whisper.py +2 -2
- sonusai-0.17.2/sonusai/utils/wave.py → sonusai-0.17.3/sonusai/utils/write_audio.py +2 -2
- {sonusai-0.17.2 → sonusai-0.17.3}/README.rst +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/aawscd_probwrite.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/data/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/data/genmixdb.yml +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/data/speech_ma01_01.wav +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/data/whitenoise.wav +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/doc/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/doc/doc.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/doc.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/genft.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/genmix.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/genmixdb.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/gentcst.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/lsdb.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/main.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_class_weights.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_optimal_thresholds.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_pcm.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_pesq.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_sa_sdr.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_sample_weights.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_wer.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/calc_wsdr.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/class_summary.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/confusion_matrix_summary.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/one_hot.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/metrics/snr_summary.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/audio.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/augmentation.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/class_count.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/config.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/constants.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/datatypes.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/eq_rule_is_valid.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/feature.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/generation.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/helpers.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/log_duration_and_sizes.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/mapped_snr_f.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/soundfile_audio.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/sox_audio.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/speaker_metadata.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/spectral_mask.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/target_class_balancing.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/targets.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/tokenized_shell_vars.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/torchaudio_audio.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/torchaudio_augmentation.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/crm.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/data.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/energy.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/file.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/phoneme.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/sed.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mixture/truth_functions/target.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/mkmanifest.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/plot.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/queries/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/queries/queries.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/l2arctic.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/librispeech.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/mcgill.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/textgrid.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/timit.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/types.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/speech/vctk.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/summarize_metric_spenh.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/tplot.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asl_p56.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_functions/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_manifest_functions/__init__.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_manifest_functions/data.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_manifest_functions/librispeech.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_manifest_functions/mcgill_speech.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/audio_devices.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/braced_glob.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/calculate_input_shape.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/convert_string_to_number.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/create_timestamp.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/create_ts_name.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/dataclass_from_dict.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/db.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/docstring.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/energy_f.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/engineering_number.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/get_frames_per_batch.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/get_label_names.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/grouper.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/human_readable_size.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/max_text_width.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/model_utils.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/numeric_conversion.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/onnx_utils.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/parallel.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/path_info.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/print_mixture_details.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/ranges.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/read_mixture_data.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/read_predict_data.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/reshape.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/seconds_to_hms.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/stacked_complex.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/stratified_shuffle_split.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/utils/yes_or_no.py +0 -0
- {sonusai-0.17.2 → sonusai-0.17.3}/sonusai/vars.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sonusai
|
3
|
-
Version: 0.17.
|
3
|
+
Version: 0.17.3
|
4
4
|
Summary: Framework for building deep neural network models for sound, speech, and voice AI
|
5
5
|
Home-page: https://aaware.com
|
6
6
|
License: GPL-3.0-only
|
@@ -21,6 +21,7 @@ Requires-Dist: h5py (>=3.11.0,<4.0.0)
|
|
21
21
|
Requires-Dist: jiwer (>=3.0.3,<4.0.0)
|
22
22
|
Requires-Dist: librosa (>=0.10.1,<0.11.0)
|
23
23
|
Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
|
24
|
+
Requires-Dist: numpy (>=1.26.4,<2.0.0)
|
24
25
|
Requires-Dist: onnx (>=1.14.1,<2.0.0)
|
25
26
|
Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
|
26
27
|
Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "sonusai"
|
3
|
-
version = "0.17.
|
3
|
+
version = "0.17.3"
|
4
4
|
description = "Framework for building deep neural network models for sound, speech, and voice AI"
|
5
5
|
authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
6
6
|
maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
|
@@ -21,6 +21,7 @@ h5py = "^3.11.0"
|
|
21
21
|
jiwer = "^3.0.3"
|
22
22
|
librosa = "^0.10.1"
|
23
23
|
matplotlib = "^3.8.0"
|
24
|
+
numpy = "^1.26.4"
|
24
25
|
onnx = "^1.14.1"
|
25
26
|
onnxruntime = "^1.16.1"
|
26
27
|
paho-mqtt = "^2.0.0"
|
@@ -47,6 +48,7 @@ mypy = "^1.6.0"
|
|
47
48
|
mypy-extensions = "^1.0.0"
|
48
49
|
pytest = "^8.1.1"
|
49
50
|
sonusai-asr-cloud = "^0.1.0"
|
51
|
+
sonusai-asr-sensory = "^0.1.0"
|
50
52
|
sonusai-torchl = "^0.1.0"
|
51
53
|
types-pyyaml = "^6.0.12.12"
|
52
54
|
types-requests = "^2.31.0.8"
|
@@ -86,7 +86,7 @@ def main() -> None:
|
|
86
86
|
from sonusai.utils import create_timestamp
|
87
87
|
from sonusai.utils import get_input_devices
|
88
88
|
from sonusai.utils import load_ort_session
|
89
|
-
from sonusai.utils import
|
89
|
+
from sonusai.utils import write_audio
|
90
90
|
|
91
91
|
ts = create_timestamp()
|
92
92
|
capture_name = f'audiofe_capture_{ts}'
|
@@ -121,7 +121,7 @@ def main() -> None:
|
|
121
121
|
logger.exception(e)
|
122
122
|
return
|
123
123
|
# Only write if capture from device, not for file input
|
124
|
-
|
124
|
+
write_audio(capture_wav, capture_audio, SAMPLE_RATE)
|
125
125
|
logger.info('')
|
126
126
|
logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
|
127
127
|
|
@@ -175,7 +175,7 @@ def main() -> None:
|
|
175
175
|
logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
|
176
176
|
|
177
177
|
predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
|
178
|
-
|
178
|
+
write_audio(predict_wav, predict_audio, SAMPLE_RATE)
|
179
179
|
logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
|
180
180
|
if debug:
|
181
181
|
with h5py.File(h5_name, 'a') as f:
|
@@ -718,7 +718,7 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
718
718
|
from sonusai.utils import reshape_outputs
|
719
719
|
from sonusai.utils import stack_complex
|
720
720
|
from sonusai.utils import unstack_complex
|
721
|
-
from sonusai.utils import
|
721
|
+
from sonusai.utils import write_audio
|
722
722
|
|
723
723
|
mixdb = MP_GLOBAL.mixdb
|
724
724
|
predict_location = MP_GLOBAL.predict_location
|
@@ -968,12 +968,12 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
968
968
|
|
969
969
|
# 7) write wav files
|
970
970
|
if enable_wav:
|
971
|
-
|
972
|
-
|
973
|
-
#
|
974
|
-
|
975
|
-
|
976
|
-
|
971
|
+
write_audio(name=base_name + '_mixture.wav', audio=float_to_int16(mixture))
|
972
|
+
write_audio(name=base_name + '_target.wav', audio=float_to_int16(target))
|
973
|
+
# write_audio(name=base_name + '_target_fi.wav', audio=float_to_int16(target_fi))
|
974
|
+
write_audio(name=base_name + '_noise.wav', audio=float_to_int16(noise))
|
975
|
+
write_audio(name=base_name + '_target_est.wav', audio=float_to_int16(target_est_wav))
|
976
|
+
write_audio(name=base_name + '_noise_est.wav', audio=float_to_int16(noise_est_wav))
|
977
977
|
|
978
978
|
# debug code to test for perfect reconstruction of the extraction method
|
979
979
|
# note both 75% olsa-hanns and 50% olsa-hann modes checked to have perfect reconstruction
|
@@ -10,7 +10,6 @@ from typing import Optional
|
|
10
10
|
|
11
11
|
from praatio import textgrid
|
12
12
|
from praatio.utilities.constants import Interval
|
13
|
-
|
14
13
|
from sonusai.mixture.datatypes import AudioF
|
15
14
|
from sonusai.mixture.datatypes import AudioT
|
16
15
|
from sonusai.mixture.datatypes import AudiosF
|
@@ -84,6 +84,7 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
84
84
|
:return: Augmented audio
|
85
85
|
"""
|
86
86
|
import math
|
87
|
+
from pathlib import Path
|
87
88
|
import tempfile
|
88
89
|
|
89
90
|
import numpy as np
|
@@ -124,7 +125,9 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
|
|
124
125
|
except Exception as e:
|
125
126
|
raise SonusAIError(f'Error applying IR: {e}')
|
126
127
|
|
128
|
+
path = Path(temp.name)
|
127
129
|
temp.close()
|
130
|
+
path.unlink()
|
128
131
|
|
129
132
|
# Reset level to previous max value
|
130
133
|
tfm = Transformer()
|
@@ -72,7 +72,7 @@ def _process_mixture(mixid: int) -> None:
|
|
72
72
|
|
73
73
|
from sonusai.mixture import mixture_metadata
|
74
74
|
from sonusai.utils import float_to_int16
|
75
|
-
from sonusai.utils import
|
75
|
+
from sonusai.utils import write_audio
|
76
76
|
|
77
77
|
mixture_filename = join(MP_GLOBAL.mixdb.location, MP_GLOBAL.mixdb.mixtures[mixid].name)
|
78
78
|
mixture_basename = splitext(mixture_filename)[0]
|
@@ -100,11 +100,11 @@ def _process_mixture(mixid: int) -> None:
|
|
100
100
|
if MP_GLOBAL.write_noise:
|
101
101
|
noise = np.array(f['noise'])
|
102
102
|
|
103
|
-
|
103
|
+
write_audio(name=mixture_basename + '_mixture.wav', audio=float_to_int16(mixture))
|
104
104
|
if MP_GLOBAL.write_target:
|
105
|
-
|
105
|
+
write_audio(name=mixture_basename + '_target.wav', audio=float_to_int16(target))
|
106
106
|
if MP_GLOBAL.write_noise:
|
107
|
-
|
107
|
+
write_audio(name=mixture_basename + '_noise.wav', audio=float_to_int16(noise))
|
108
108
|
|
109
109
|
with open(file=mixture_basename + '.txt', mode='w') as f:
|
110
110
|
f.write(mixture_metadata(MP_GLOBAL.mixdb, MP_GLOBAL.mixdb.mixture(mixid)))
|
@@ -100,7 +100,7 @@ def main() -> None:
|
|
100
100
|
from sonusai.utils import create_ts_name
|
101
101
|
from sonusai.utils import load_ort_session
|
102
102
|
from sonusai.utils import reshape_inputs
|
103
|
-
from sonusai.utils import
|
103
|
+
from sonusai.utils import write_audio
|
104
104
|
|
105
105
|
mixdb_path = None
|
106
106
|
mixdb = None
|
@@ -201,7 +201,7 @@ def main() -> None:
|
|
201
201
|
predict = np.transpose(predict, [1, 0, 2])
|
202
202
|
predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
|
203
203
|
owav_name = splitext(output_fname)[0] + '_predict.wav'
|
204
|
-
|
204
|
+
write_audio(owav_name, predict_audio)
|
205
205
|
|
206
206
|
|
207
207
|
if __name__ == '__main__':
|
@@ -137,7 +137,7 @@ def _process(file: str) -> None:
|
|
137
137
|
from sonusai.mixture import get_audio_from_transform
|
138
138
|
from sonusai.utils import float_to_int16
|
139
139
|
from sonusai.utils import unstack_complex
|
140
|
-
from sonusai.utils import
|
140
|
+
from sonusai.utils import write_audio
|
141
141
|
|
142
142
|
try:
|
143
143
|
with h5py.File(file, 'r') as f:
|
@@ -153,7 +153,7 @@ def _process(file: str) -> None:
|
|
153
153
|
bin_end=MP_GLOBAL.bin_end,
|
154
154
|
ttype=MP_GLOBAL.ttype,
|
155
155
|
gain=np.float32(1)))
|
156
|
-
|
156
|
+
write_audio(name=output_name, audio=float_to_int16(audio))
|
157
157
|
|
158
158
|
|
159
159
|
if __name__ == '__main__':
|
@@ -16,14 +16,30 @@ def load_speakers(input_dir: Path) -> dict:
|
|
16
16
|
import csv
|
17
17
|
|
18
18
|
speakers = {}
|
19
|
+
|
20
|
+
# VoxCeleb1
|
21
|
+
first = True
|
22
|
+
with open(input_dir / 'vox1_meta.csv', newline='') as file:
|
23
|
+
data = csv.reader(file, delimiter='\t')
|
24
|
+
for row in data:
|
25
|
+
if first:
|
26
|
+
first = False
|
27
|
+
else:
|
28
|
+
speakers[row[0].strip()] = {'gender': row[2].strip(),
|
29
|
+
'dialect': row[3].strip(),
|
30
|
+
'category': row[4].strip()}
|
31
|
+
|
32
|
+
# VoxCeleb2
|
19
33
|
first = True
|
20
|
-
with open(input_dir / '
|
21
|
-
data = csv.reader(file)
|
34
|
+
with open(input_dir / 'vox2_meta.csv', newline='') as file:
|
35
|
+
data = csv.reader(file, delimiter='\t')
|
22
36
|
for row in data:
|
23
37
|
if first:
|
24
38
|
first = False
|
25
39
|
else:
|
26
|
-
speakers[row[
|
40
|
+
speakers[row[1].strip()] = {'gender': row[3].strip(),
|
41
|
+
'category': row[4].strip()}
|
42
|
+
|
27
43
|
return speakers
|
28
44
|
|
29
45
|
|
@@ -49,5 +49,5 @@ from .stacked_complex import stacked_complex_imag
|
|
49
49
|
from .stacked_complex import stacked_complex_real
|
50
50
|
from .stacked_complex import unstack_complex
|
51
51
|
from .stratified_shuffle_split import stratified_shuffle_split_mixid
|
52
|
-
from .
|
52
|
+
from .write_audio import write_audio
|
53
53
|
from .yes_or_no import yes_or_no
|
@@ -13,7 +13,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
|
|
13
13
|
from sonusai import SonusAIError
|
14
14
|
from sonusai.utils import ASRResult
|
15
15
|
from sonusai.utils import float_to_int16
|
16
|
-
from sonusai.utils import
|
16
|
+
from sonusai.utils import write_audio
|
17
17
|
|
18
18
|
url = getenv('AAWARE_WHISPER_URL')
|
19
19
|
if url is None:
|
@@ -22,7 +22,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
|
|
22
22
|
|
23
23
|
with tempfile.TemporaryDirectory() as tmp:
|
24
24
|
file = join(tmp, 'asr.wav')
|
25
|
-
|
25
|
+
write_audio(name=file, audio=float_to_int16(data.audio))
|
26
26
|
|
27
27
|
files = {'audio_file': (file, open(file, 'rb'), 'audio/wav')}
|
28
28
|
|
@@ -2,8 +2,8 @@ from sonusai.mixture.constants import SAMPLE_RATE
|
|
2
2
|
from sonusai.mixture.datatypes import AudioT
|
3
3
|
|
4
4
|
|
5
|
-
def
|
6
|
-
""" Write
|
5
|
+
def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
|
6
|
+
""" Write an audio file.
|
7
7
|
|
8
8
|
To write multiple channels, use a 2D array of shape [channels, samples].
|
9
9
|
The bits per sample and PCM/float are determined by the data type.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|