sonusai 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonusai/audiofe.py CHANGED
@@ -86,7 +86,7 @@ def main() -> None:
86
86
  from sonusai.utils import create_timestamp
87
87
  from sonusai.utils import get_input_devices
88
88
  from sonusai.utils import load_ort_session
89
- from sonusai.utils import write_wav
89
+ from sonusai.utils import write_audio
90
90
 
91
91
  ts = create_timestamp()
92
92
  capture_name = f'audiofe_capture_{ts}'
@@ -121,7 +121,7 @@ def main() -> None:
121
121
  logger.exception(e)
122
122
  return
123
123
  # Only write if capture from device, not for file input
124
- write_wav(capture_wav, capture_audio, SAMPLE_RATE)
124
+ write_audio(capture_wav, capture_audio, SAMPLE_RATE)
125
125
  logger.info('')
126
126
  logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
127
127
 
@@ -175,7 +175,7 @@ def main() -> None:
175
175
  logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
176
176
 
177
177
  predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
178
- write_wav(predict_wav, predict_audio, SAMPLE_RATE)
178
+ write_audio(predict_wav, predict_audio, SAMPLE_RATE)
179
179
  logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
180
180
  if debug:
181
181
  with h5py.File(h5_name, 'a') as f:
@@ -718,7 +718,7 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
718
718
  from sonusai.utils import reshape_outputs
719
719
  from sonusai.utils import stack_complex
720
720
  from sonusai.utils import unstack_complex
721
- from sonusai.utils import write_wav
721
+ from sonusai.utils import write_audio
722
722
 
723
723
  mixdb = MP_GLOBAL.mixdb
724
724
  predict_location = MP_GLOBAL.predict_location
@@ -968,12 +968,12 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
968
968
 
969
969
  # 7) write wav files
970
970
  if enable_wav:
971
- write_wav(name=base_name + '_mixture.wav', audio=float_to_int16(mixture))
972
- write_wav(name=base_name + '_target.wav', audio=float_to_int16(target))
973
- # write_wav(name=base_name + '_target_fi.wav', audio=float_to_int16(target_fi))
974
- write_wav(name=base_name + '_noise.wav', audio=float_to_int16(noise))
975
- write_wav(name=base_name + '_target_est.wav', audio=float_to_int16(target_est_wav))
976
- write_wav(name=base_name + '_noise_est.wav', audio=float_to_int16(noise_est_wav))
971
+ write_audio(name=base_name + '_mixture.wav', audio=float_to_int16(mixture))
972
+ write_audio(name=base_name + '_target.wav', audio=float_to_int16(target))
973
+ # write_audio(name=base_name + '_target_fi.wav', audio=float_to_int16(target_fi))
974
+ write_audio(name=base_name + '_noise.wav', audio=float_to_int16(noise))
975
+ write_audio(name=base_name + '_target_est.wav', audio=float_to_int16(target_est_wav))
976
+ write_audio(name=base_name + '_noise_est.wav', audio=float_to_int16(noise_est_wav))
977
977
 
978
978
  # debug code to test for perfect reconstruction of the extraction method
979
979
  # note both 75% olsa-hanns and 50% olsa-hann modes checked to have perfect reconstruction
sonusai/mixture/mixdb.py CHANGED
@@ -10,7 +10,6 @@ from typing import Optional
10
10
 
11
11
  from praatio import textgrid
12
12
  from praatio.utilities.constants import Interval
13
-
14
13
  from sonusai.mixture.datatypes import AudioF
15
14
  from sonusai.mixture.datatypes import AudioT
16
15
  from sonusai.mixture.datatypes import AudiosF
@@ -84,6 +84,7 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
84
84
  :return: Augmented audio
85
85
  """
86
86
  import math
87
+ from pathlib import Path
87
88
  import tempfile
88
89
 
89
90
  import numpy as np
@@ -124,7 +125,9 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
124
125
  except Exception as e:
125
126
  raise SonusAIError(f'Error applying IR: {e}')
126
127
 
128
+ path = Path(temp.name)
127
129
  temp.close()
130
+ path.unlink()
128
131
 
129
132
  # Reset level to previous max value
130
133
  tfm = Transformer()
sonusai/mkwav.py CHANGED
@@ -72,7 +72,7 @@ def _process_mixture(mixid: int) -> None:
72
72
 
73
73
  from sonusai.mixture import mixture_metadata
74
74
  from sonusai.utils import float_to_int16
75
- from sonusai.utils import write_wav
75
+ from sonusai.utils import write_audio
76
76
 
77
77
  mixture_filename = join(MP_GLOBAL.mixdb.location, MP_GLOBAL.mixdb.mixtures[mixid].name)
78
78
  mixture_basename = splitext(mixture_filename)[0]
@@ -100,11 +100,11 @@ def _process_mixture(mixid: int) -> None:
100
100
  if MP_GLOBAL.write_noise:
101
101
  noise = np.array(f['noise'])
102
102
 
103
- write_wav(name=mixture_basename + '_mixture.wav', audio=float_to_int16(mixture))
103
+ write_audio(name=mixture_basename + '_mixture.wav', audio=float_to_int16(mixture))
104
104
  if MP_GLOBAL.write_target:
105
- write_wav(name=mixture_basename + '_target.wav', audio=float_to_int16(target))
105
+ write_audio(name=mixture_basename + '_target.wav', audio=float_to_int16(target))
106
106
  if MP_GLOBAL.write_noise:
107
- write_wav(name=mixture_basename + '_noise.wav', audio=float_to_int16(noise))
107
+ write_audio(name=mixture_basename + '_noise.wav', audio=float_to_int16(noise))
108
108
 
109
109
  with open(file=mixture_basename + '.txt', mode='w') as f:
110
110
  f.write(mixture_metadata(MP_GLOBAL.mixdb, MP_GLOBAL.mixdb.mixture(mixid)))
sonusai/onnx_predict.py CHANGED
@@ -100,7 +100,7 @@ def main() -> None:
100
100
  from sonusai.utils import create_ts_name
101
101
  from sonusai.utils import load_ort_session
102
102
  from sonusai.utils import reshape_inputs
103
- from sonusai.utils import write_wav
103
+ from sonusai.utils import write_audio
104
104
 
105
105
  mixdb_path = None
106
106
  mixdb = None
@@ -201,7 +201,7 @@ def main() -> None:
201
201
  predict = np.transpose(predict, [1, 0, 2])
202
202
  predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
203
203
  owav_name = splitext(output_fname)[0] + '_predict.wav'
204
- write_wav(owav_name, predict_audio)
204
+ write_audio(owav_name, predict_audio)
205
205
 
206
206
 
207
207
  if __name__ == '__main__':
@@ -137,7 +137,7 @@ def _process(file: str) -> None:
137
137
  from sonusai.mixture import get_audio_from_transform
138
138
  from sonusai.utils import float_to_int16
139
139
  from sonusai.utils import unstack_complex
140
- from sonusai.utils import write_wav
140
+ from sonusai.utils import write_audio
141
141
 
142
142
  try:
143
143
  with h5py.File(file, 'r') as f:
@@ -153,7 +153,7 @@ def _process(file: str) -> None:
153
153
  bin_end=MP_GLOBAL.bin_end,
154
154
  ttype=MP_GLOBAL.ttype,
155
155
  gain=np.float32(1)))
156
- write_wav(name=output_name, audio=float_to_int16(audio))
156
+ write_audio(name=output_name, audio=float_to_int16(audio))
157
157
 
158
158
 
159
159
  if __name__ == '__main__':
@@ -16,14 +16,30 @@ def load_speakers(input_dir: Path) -> dict:
16
16
  import csv
17
17
 
18
18
  speakers = {}
19
+
20
+ # VoxCeleb1
21
+ first = True
22
+ with open(input_dir / 'vox1_meta.csv', newline='') as file:
23
+ data = csv.reader(file, delimiter='\t')
24
+ for row in data:
25
+ if first:
26
+ first = False
27
+ else:
28
+ speakers[row[0].strip()] = {'gender': row[2].strip(),
29
+ 'dialect': row[3].strip(),
30
+ 'category': row[4].strip()}
31
+
32
+ # VoxCeleb2
19
33
  first = True
20
- with open(input_dir / 'vox2_meta_cleansed.csv', newline='') as file:
21
- data = csv.reader(file)
34
+ with open(input_dir / 'vox2_meta.csv', newline='') as file:
35
+ data = csv.reader(file, delimiter='\t')
22
36
  for row in data:
23
37
  if first:
24
38
  first = False
25
39
  else:
26
- speakers[row[0].strip()] = {'gender': row[2].strip(), 'category': row[3].strip()}
40
+ speakers[row[1].strip()] = {'gender': row[3].strip(),
41
+ 'category': row[4].strip()}
42
+
27
43
  return speakers
28
44
 
29
45
 
sonusai/utils/__init__.py CHANGED
@@ -49,5 +49,5 @@ from .stacked_complex import stacked_complex_imag
49
49
  from .stacked_complex import stacked_complex_real
50
50
  from .stacked_complex import unstack_complex
51
51
  from .stratified_shuffle_split import stratified_shuffle_split_mixid
52
- from .wave import write_wav
52
+ from .write_audio import write_audio
53
53
  from .yes_or_no import yes_or_no
@@ -13,7 +13,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
13
13
  from sonusai import SonusAIError
14
14
  from sonusai.utils import ASRResult
15
15
  from sonusai.utils import float_to_int16
16
- from sonusai.utils import write_wav
16
+ from sonusai.utils import write_audio
17
17
 
18
18
  url = getenv('AAWARE_WHISPER_URL')
19
19
  if url is None:
@@ -22,7 +22,7 @@ def aaware_whisper(data: ASRData) -> ASRResult:
22
22
 
23
23
  with tempfile.TemporaryDirectory() as tmp:
24
24
  file = join(tmp, 'asr.wav')
25
- write_wav(name=file, audio=float_to_int16(data.audio))
25
+ write_audio(name=file, audio=float_to_int16(data.audio))
26
26
 
27
27
  files = {'audio_file': (file, open(file, 'rb'), 'audio/wav')}
28
28
 
@@ -2,8 +2,8 @@ from sonusai.mixture.constants import SAMPLE_RATE
2
2
  from sonusai.mixture.datatypes import AudioT
3
3
 
4
4
 
5
- def write_wav(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
6
- """ Write a simple, uncompressed WAV file.
5
+ def write_audio(name: str, audio: AudioT, sample_rate: int = SAMPLE_RATE) -> None:
6
+ """ Write an audio file.
7
7
 
8
8
  To write multiple channels, use a 2D array of shape [channels, samples].
9
9
  The bits per sample and PCM/float are determined by the data type.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonusai
3
- Version: 0.17.2
3
+ Version: 0.17.3
4
4
  Summary: Framework for building deep neural network models for sound, speech, and voice AI
5
5
  Home-page: https://aaware.com
6
6
  License: GPL-3.0-only
@@ -21,6 +21,7 @@ Requires-Dist: h5py (>=3.11.0,<4.0.0)
21
21
  Requires-Dist: jiwer (>=3.0.3,<4.0.0)
22
22
  Requires-Dist: librosa (>=0.10.1,<0.11.0)
23
23
  Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
24
+ Requires-Dist: numpy (>=1.26.4,<2.0.0)
24
25
  Requires-Dist: onnx (>=1.14.1,<2.0.0)
25
26
  Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
26
27
  Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
@@ -1,7 +1,7 @@
1
1
  sonusai/__init__.py,sha256=vzTFfRB-NeO-Sm3puySDJOybk3ND_Oj6w0EejQPmH1U,2978
2
2
  sonusai/aawscd_probwrite.py,sha256=GukR5owp_0A3DrqSl9fHWULYgclNft4D5OkHIwfxxkc,3698
3
- sonusai/audiofe.py,sha256=AHXV7fQKumkwUSbOS-ZU6Cp1VF88DRtqt7foVbf-Nh8,11148
4
- sonusai/calc_metric_spenh.py,sha256=Xgy9EKbZRPAydjTZbpZjaqLBNkjQPjDmSbfL8PbVSgY,62157
3
+ sonusai/audiofe.py,sha256=zOySiYs5ZZm60eMbA7RjhG6C0Ouhaii3WfL1d0Q8rxg,11154
4
+ sonusai/calc_metric_spenh.py,sha256=0UvJibVjjoHTty3QFh0qzugXe-LvIj2XsMwoaIR44BM,62171
5
5
  sonusai/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  sonusai/data/genmixdb.yml,sha256=-XSs_hUR6wHJVoTPmSewzXL7u61X-xmHY46lNPatxSE,1025
7
7
  sonusai/data/speech_ma01_01.wav,sha256=PK0vMKg-NR6rPE3KouxHGF6PKXnJCr7AwjMqfu98LUA,76644
@@ -41,10 +41,10 @@ sonusai/mixture/generation.py,sha256=miUrc3QOSUNIG6mDkiMCZ6M2ulivUZxlYUAJUOVomWc
41
41
  sonusai/mixture/helpers.py,sha256=GSGSD2KnvOeEIB6IwNTxyaQNjghTSBMB729kUEd_RiM,22403
42
42
  sonusai/mixture/log_duration_and_sizes.py,sha256=baTUpqyM15wA125jo9E3posmVJUe3WlpksyO6v9Jul0,1347
43
43
  sonusai/mixture/mapped_snr_f.py,sha256=mlbYM1t14OXe_Zg4CjpWTuA_Zun4W0O3bSUXeodRBQs,1845
44
- sonusai/mixture/mixdb.py,sha256=PvLeEOLn2n0EfBRe7GuvUQfOmj3SKOrzjUimw2qRHP8,49792
44
+ sonusai/mixture/mixdb.py,sha256=zfBaVOuMKHTDzmwR4hWRpGxFwnV_fJkwLDmn9M1SDds,49791
45
45
  sonusai/mixture/soundfile_audio.py,sha256=mHa5SIXsu_uE0j3DO52GydRJrvWSzU_nII-7YJfQ6Qo,4154
46
46
  sonusai/mixture/sox_audio.py,sha256=HT3kYA9TP5QPCuoOJdUMnGVN-qY6q96DGL8zxuog76o,12277
47
- sonusai/mixture/sox_augmentation.py,sha256=F9tBdNvX2guCn7gRppAFrxRnBtjw9q6qAq2_v_A4hh0,4490
47
+ sonusai/mixture/sox_augmentation.py,sha256=kBWPrsFk0EBi71nLcKt5v0GA34bY7g9D9x0cEamNWbU,4564
48
48
  sonusai/mixture/speaker_metadata.py,sha256=l98avdxLYUsSDZ88xUjfvHnACkbnD0_Dg1aBGDbzS9I,1380
49
49
  sonusai/mixture/spectral_mask.py,sha256=8AkCwhy-PSdP1Uri9miKZP-bXFYnFcH_c9xZCGrHavU,2071
50
50
  sonusai/mixture/target_class_balancing.py,sha256=NTNiKZH0_PWLooeow0l41CjJKK8ZTMVbUqz9ZkaNtWk,4900
@@ -62,10 +62,10 @@ sonusai/mixture/truth_functions/phoneme.py,sha256=stYdlPuNytQK_LLT61OJLfYSqKd-sD
62
62
  sonusai/mixture/truth_functions/sed.py,sha256=8cHjEFjZaH_0hIOHhPmj4AJz2GpEADM6Ys2x4NoiWSY,2469
63
63
  sonusai/mixture/truth_functions/target.py,sha256=KAsjugDRooOA5BRcHVAbZRgV7l8S5CFg7CZ0XtKZaQ0,5764
64
64
  sonusai/mkmanifest.py,sha256=imI8swwPYVzumrUYEL-9JLvun-ez98PtlUBj2b729k8,8682
65
- sonusai/mkwav.py,sha256=kLfC2ZuF-t8P97nqYw2falTZpymxAeXv0YTJCe6nK10,5356
66
- sonusai/onnx_predict.py,sha256=ZhicNEbjxm34edIrUcmuvKkV3NRFQk4LBn1LUCFdPjg,8733
65
+ sonusai/mkwav.py,sha256=zfSyIiQTIK3KV9Ij33jkLhhZIMVYqaROcRQ4S7c4sIo,5364
66
+ sonusai/onnx_predict.py,sha256=jSxhD2oFyGSTHOGCXbW4fRT-k4SqKOboK2JaDO-yWcs,8737
67
67
  sonusai/plot.py,sha256=ERkmxMM3qjcCDm4LGDQY4fRAncCYAzP7uW8iZ7_brcg,17105
68
- sonusai/post_spenh_targetf.py,sha256=xOz5T6WZuyTHmfbtILIY9skgH064Wvi2GF2Bo5L3YMU,4998
68
+ sonusai/post_spenh_targetf.py,sha256=pHaJZtms7aj4r6sgqQnEGVi6Gg8H_V29szigogV1vZ8,5002
69
69
  sonusai/queries/__init__.py,sha256=oKY5JeqZ4Cz7DwCwPc1_ydB8bUs6KaMcWFp_w02TjOs,255
70
70
  sonusai/queries/queries.py,sha256=oV-m9uiLZOwYTK-Wo7Gf8dpGisaoGf6uDsAJAarVqZI,7553
71
71
  sonusai/speech/__init__.py,sha256=SuPcU_K9wQISsZRIzsRNLtEC6cb616l-Jlx3PU-HWMs,113
@@ -76,14 +76,14 @@ sonusai/speech/textgrid.py,sha256=8hB6SdEEXxo6JXVFq8mJ1-ilRbBiRXhaHTQjA-HWg-0,33
76
76
  sonusai/speech/timit.py,sha256=1vWgj6isD3ATOjMJSTjOPLmDkYyB65M5MwYipEmLEvg,4081
77
77
  sonusai/speech/types.py,sha256=4eKVPAktpkIrZ2qoVp2iT45zxTVNocQEGT6O_Zlub_w,214
78
78
  sonusai/speech/vctk.py,sha256=EAMEBAzjZUI6dw15n-yI2oCN-H4tzM9t4aUVlOxpAbo,1540
79
- sonusai/speech/voxceleb2.py,sha256=-u0mtxFm4chFipLgMGZXR5EBDtYTCQoU1_j_wYTGwPY,2158
79
+ sonusai/speech/voxceleb.py,sha256=aJGN0yDb2LFLmCKmRzmUEjpZWQ-QGWw6XWOpy9967AI,2686
80
80
  sonusai/summarize_metric_spenh.py,sha256=OiZe_bhCq5esXNhsOkHDD7g4ssYrpENDHvDVoPzV9iw,1822
81
81
  sonusai/tplot.py,sha256=85T6OPZfxVegHBiSuilFpdgCNMEE0VKAuciNy4rCY5Y,14544
82
- sonusai/utils/__init__.py,sha256=y2Xe72QMNk8LbbjdOUOHiR5eVg32fYrFhinWSuSHi-w,2248
82
+ sonusai/utils/__init__.py,sha256=h7QrOyEBMUMoIBFKZpNwDG8Jg-1uw3bs-qflB3CXxhU,2257
83
83
  sonusai/utils/asl_p56.py,sha256=-bvQpd-jRQVURbkZJpRoyEAq6gTv9Rc3oFDbh5_lcjY,3861
84
84
  sonusai/utils/asr.py,sha256=6y6VYJizHpuQ3MgKbEQ4t2gofO-MW6Ez23oAd6d23IE,2920
85
85
  sonusai/utils/asr_functions/__init__.py,sha256=JyHK67s97bw7QzrlkboWhws4yNytdPatqzLJxfwx-yw,43
86
- sonusai/utils/asr_functions/aaware_whisper.py,sha256=LzO9CZV0wBWkjmCR2nSWN_AW9UJwriAsC1OYSlfVeT8,1981
86
+ sonusai/utils/asr_functions/aaware_whisper.py,sha256=Ew3zb8OfbxEW7q1s-KA7D5eph4SjVSUAJgiLK-vVqhI,1985
87
87
  sonusai/utils/asr_manifest_functions/__init__.py,sha256=jfi9xC5c86F_aMSsI5Xj-pxWGxuQ7fwZ8Wdf4T7kDsA,343
88
88
  sonusai/utils/asr_manifest_functions/data.py,sha256=nO4oT3EQmydwn1pzc-ZM09yz4X2ic-LQuHzGEnJhKe8,32
89
89
  sonusai/utils/asr_manifest_functions/librispeech.py,sha256=_3tGc8qfAUpYJZ0_avpW0vGp7zjdpeqj1HAgXi3TL4Q,1612
@@ -118,10 +118,10 @@ sonusai/utils/reshape.py,sha256=E8Eu6grynaeWwVO6peIR0BF22SrVaJSa1Rkl109lq6Y,5997
118
118
  sonusai/utils/seconds_to_hms.py,sha256=oxLuZhTJJr9swj-fOSOrZJ5vBNM7_BrOMQhX1pYpiv0,260
119
119
  sonusai/utils/stacked_complex.py,sha256=feLhz3GC1ILxBGMHOj3sJK--sidsXKbfwkalwAVwizc,2950
120
120
  sonusai/utils/stratified_shuffle_split.py,sha256=rJNXvBp-GxoKzH3OpL7k0ANSu5xMP2zJ7K1fm_33UzE,7022
121
- sonusai/utils/wave.py,sha256=O4ZXkZ6wjrKGa99wBCdFd8G6bp91MXXDnmGihpaEMh0,856
121
+ sonusai/utils/write_audio.py,sha256=ZsPGExwM86QHLLN2LOWekK2uAqf5pV_1oRW811p0QAI,840
122
122
  sonusai/utils/yes_or_no.py,sha256=eMLXBVH0cEahiXY4W2KNORmwNQ-ba10eRtldh0y4NYg,263
123
123
  sonusai/vars.py,sha256=m2AefF0m5bXWGXpJj8Pi42zWL2ydeEj7bkak3GrtMyM,940
124
- sonusai-0.17.2.dist-info/METADATA,sha256=eZmrmMohaVLBAz3v2lGdBcwGCjnszgDiKcAHI9i_2YE,2483
125
- sonusai-0.17.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
126
- sonusai-0.17.2.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
127
- sonusai-0.17.2.dist-info/RECORD,,
124
+ sonusai-0.17.3.dist-info/METADATA,sha256=Zu3DK9fCT1xiipWOBY4JhUZJ3qm1cVRc5eLAXonDb2s,2522
125
+ sonusai-0.17.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
126
+ sonusai-0.17.3.dist-info/entry_points.txt,sha256=zMNjEphEPO6B3cD1GNpit7z-yA9tUU5-j3W2v-UWstU,92
127
+ sonusai-0.17.3.dist-info/RECORD,,