PyPI - sonusai - Versions diffs - 0.17.2__tar.gz → 0.18.0__tar.gz - Mend

sonusai 0.17.2tar.gz → 0.18.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{sonusai-0.17.2 → sonusai-0.18.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonusai
-Version: 0.17.2
+Version: 0.18.0
 Summary: Framework for building deep neural network models for sound, speech, and voice AI
 Home-page: https://aaware.com
 License: GPL-3.0-only
@@ -21,12 +21,15 @@ Requires-Dist: h5py (>=3.11.0,<4.0.0)
 Requires-Dist: jiwer (>=3.0.3,<4.0.0)
 Requires-Dist: librosa (>=0.10.1,<0.11.0)
 Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
+Requires-Dist: mgzip (>=0.2.1,<0.3.0)
+Requires-Dist: numpy (>=1.26.4,<2.0.0)
 Requires-Dist: onnx (>=1.14.1,<2.0.0)
 Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
 Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
 Requires-Dist: pandas (>=2.1.1,<3.0.0)
 Requires-Dist: pesq (>=0.0.4,<0.0.5)
 Requires-Dist: praatio (>=6.2.0,<7.0.0)
+Requires-Dist: psutil (>=5,<6)
 Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
 Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
 Requires-Dist: pydub (>=0.25.1,<0.26.0)

{sonusai-0.17.2 → sonusai-0.18.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sonusai"
-version = "0.17.2"
+version = "0.18.0"
 description = "Framework for building deep neural network models for sound, speech, and voice AI"
 authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
 maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -21,12 +21,15 @@ h5py = "^3.11.0"
 jiwer = "^3.0.3"
 librosa = "^0.10.1"
 matplotlib = "^3.8.0"
+mgzip = "^0.2.1"
+numpy = "^1.26.4"
 onnx = "^1.14.1"
 onnxruntime = "^1.16.1"
 paho-mqtt = "^2.0.0"
 pandas = "^2.1.1"
 pesq = "^0.0.4"
 praatio = "^6.2.0"
+psutil = "^5"
 pyaaware = "^1.5.7"
 pyaudio = "^0.2.14"
 pydub = "^0.25.1"
@@ -47,7 +50,8 @@ mypy = "^1.6.0"
 mypy-extensions = "^1.0.0"
 pytest = "^8.1.1"
 sonusai-asr-cloud = "^0.1.0"
-sonusai-torchl = "^0.1.0"
+sonusai-asr-sensory = "^0.1.0"
+sonusai-torchl = "^0.3.0"
 types-pyyaml = "^6.0.12.12"
 types-requests = "^2.31.0.8"

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/__init__.py RENAMED Viewed

@@ -14,7 +14,6 @@ commands_doc = """
    genmixdb                     Generate a mixture database
    gentcst                      Generate target configuration from a subdirectory tree
    lsdb                         List information about a mixture database
-   mkmanifest                   Make ASR manifest JSON file
    mkwav                        Make WAV files from a mixture database
    onnx_predict                 Run ONNX predict on a trained model
    plot                         Plot mixture data

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/audiofe.py RENAMED Viewed

@@ -86,7 +86,7 @@ def main() -> None:
     from sonusai.utils import create_timestamp
     from sonusai.utils import get_input_devices
     from sonusai.utils import load_ort_session
-    from sonusai.utils import write_wav
+    from sonusai.utils import write_audio
     ts = create_timestamp()
     capture_name = f'audiofe_capture_{ts}'
@@ -121,7 +121,7 @@ def main() -> None:
             logger.exception(e)
             return
         # Only write if capture from device, not for file input
-        write_wav(capture_wav, capture_audio, SAMPLE_RATE)
+        write_audio(capture_wav, capture_audio, SAMPLE_RATE)
         logger.info('')
         logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_wav}')
@@ -175,7 +175,7 @@ def main() -> None:
             logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
         predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
-        write_wav(predict_wav, predict_audio, SAMPLE_RATE)
+        write_audio(predict_wav, predict_audio, SAMPLE_RATE)
         logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_wav}')
         if debug:
             with h5py.File(h5_name, 'a') as f:

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/calc_metric_spenh.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """sonusai calc_metric_spenh
-usage: calc_metric_spenh [-hvtpws] [-i MIXID] [-e ASR] [-m MODEL] PLOC TLOC
+usage: calc_metric_spenh [-hvtpws] [-i MIXID] [-e ASR] [-m MODEL] [-n NCPU] PLOC TLOC
 options:
     -h, --help
@@ -10,6 +10,7 @@ options:
     -p, --plot                  Enable PDF plots file generation per mixture.
     -w, --wav                   Generate WAV files per mixture.
     -s, --summary               Enable summary files generation.
+    -n, --num_process NCPU      Number of parallel processes to use [default: auto]
     -e ASR, --asr-method ASR    ASR method: deepgram, google, aixplain_whisper, whisper, or sensory. [default: none]
     -m MODEL, --model           ASR model name used in some ASR methods. [default: tiny]
@@ -154,8 +155,8 @@ def snr(clean_speech, processed_speech, sample_rate):
         signal_energy = np.sum(np.square(clean_frame))
         noise_energy = np.sum(np.square(clean_frame - processed_frame))
         segmental_snr[frame_count] = 10 * np.log10(signal_energy / (noise_energy + eps) + eps)
-        segmental_snr[frame_count] = np.max(segmental_snr[frame_count], min_snr)
-        segmental_snr[frame_count] = np.min(segmental_snr[frame_count], max_snr)
+        segmental_snr[frame_count] = max(segmental_snr[frame_count], min_snr)
+        segmental_snr[frame_count] = min(segmental_snr[frame_count], max_snr)
         start = start + skip_rate
@@ -697,11 +698,14 @@ def plot_e_predict_truth(predict: np.ndarray,
 def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
+    import pickle
     from os.path import basename
     from os.path import join
     from os.path import splitext
     import h5py
+    import mgzip
+    from matplotlib.backends.backend_pdf import PdfPages
     from numpy import inf
     from pystoi import stoi
@@ -718,7 +722,7 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
     from sonusai.utils import reshape_outputs
     from sonusai.utils import stack_complex
     from sonusai.utils import unstack_complex
-    from sonusai.utils import write_wav
+    from sonusai.utils import write_audio
     mixdb = MP_GLOBAL.mixdb
     predict_location = MP_GLOBAL.predict_location
@@ -800,8 +804,12 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
             mixture = mixture[0:-trim_t]
             truth_f = truth_f[0:-trim_f, :]
         elif predict.shape[0] > target_f.shape[0]:
-            raise SonusAIError(
-                f'Error: prediction has more frames than true mixture {predict.shape[0]} vs {truth_f.shape[0]}')
+            logger.debug(
+                f'Warning: prediction has more frames than true mixture {predict.shape[0]} vs {truth_f.shape[0]}')
+            trim_f = predict.shape[0] - target_f.shape[0]
+            predict = predict[0:-trim_f, :]
+            # raise SonusAIError(
+            #     f'Error: prediction has more frames than true mixture {predict.shape[0]} vs {truth_f.shape[0]}')
     # 3) Extraction - format proper complex and wav estimates and truth (unstack, uncompress, inv tf, etc.)
     if truth_est_mode:
@@ -883,13 +891,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
         wer_tge = float('nan')
         wer_pi = float('nan')
     else:
-        asr_tt = MP_GLOBAL.mixdb.get_speech_metadata(mixid, 'text')[0]  # ignore mixup
+        asr_tt = MP_GLOBAL.mixdb.mixture_speech_metadata(mixid, 'text')[0]  # ignore mixup
         if asr_tt is None:
             asr_tt = calc_asr(target, engine=asr_method, whisper_model_name=asr_model_name).text  # target truth
-        # if MP_GLOBAL.mixdb.asr_manifests:
-        #     asr_tt = MP_GLOBAL.mixdb.mixture_asr_data(mixid)[0]  # ignore mixup
-        # else:
-        #     asr_tt = calc_asr(target, engine=asr_method, whisper_model_name=asr_model_name).text  # target truth
         if asr_tt:
             asr_mx = calc_asr(mixture, engine=asr_method, whisper_model_name=asr_model_name).text
@@ -957,10 +961,7 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
         print(f'Noise path: {mixdb.noise_file(ni).name}', file=f)
         if asr_method != 'none':
             print(f'ASR method: {asr_method} and whisper model (if used):  {asr_model_name}', file=f)
-            if mixdb.asr_manifests:
-                print(f'ASR truth from metadata:  {asr_tt}', file=f)
-            else:
-                print(f'ASR truth from wer method:  {asr_tt}', file=f)
+            print(f'ASR truth:  {asr_tt}', file=f)
             print(f'ASR result for mixture:  {asr_mx}', file=f)
             print(f'ASR result for prediction:  {asr_tge}', file=f)
@@ -968,12 +969,12 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
     # 7) write wav files
     if enable_wav:
-        write_wav(name=base_name + '_mixture.wav', audio=float_to_int16(mixture))
-        write_wav(name=base_name + '_target.wav', audio=float_to_int16(target))
-        # write_wav(name=base_name + '_target_fi.wav', audio=float_to_int16(target_fi))
-        write_wav(name=base_name + '_noise.wav', audio=float_to_int16(noise))
-        write_wav(name=base_name + '_target_est.wav', audio=float_to_int16(target_est_wav))
-        write_wav(name=base_name + '_noise_est.wav', audio=float_to_int16(noise_est_wav))
+        write_audio(name=base_name + '_mixture.wav', audio=float_to_int16(mixture))
+        write_audio(name=base_name + '_target.wav', audio=float_to_int16(target))
+        # write_audio(name=base_name + '_target_fi.wav', audio=float_to_int16(target_fi))
+        write_audio(name=base_name + '_noise.wav', audio=float_to_int16(noise))
+        write_audio(name=base_name + '_target_est.wav', audio=float_to_int16(target_est_wav))
+        write_audio(name=base_name + '_noise_est.wav', audio=float_to_int16(noise_est_wav))
         # debug code to test for perfect reconstruction of the extraction method
         # note both 75% olsa-hanns and 50% olsa-hann modes checked to have perfect reconstruction
@@ -984,7 +985,6 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
     # 8) Write out plot file
     if enable_plot:
-        from matplotlib.backends.backend_pdf import PdfPages
         plot_name = base_name + '_metric_spenh.pdf'
         # Reshape feature to eliminate overlap redundancy for easier to understand spectrogram view
@@ -1015,12 +1015,15 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
                 tfunc_name = tfunc_name + ' (db)'
             mixspec = 20 * np.log10(abs(mixture_f) + np.finfo(np.float32).eps)
-            pdf.savefig(plot_mixpred(mixture=mixture,
-                                     mixture_f=mixspec,
-                                     target=target,
-                                     feature=feat_sgram,
-                                     predict=predplot,
-                                     tp_title=tfunc_name))
+            fig_obj = plot_mixpred(mixture=mixture,
+                                   mixture_f=mixspec,
+                                   target=target,
+                                   feature=feat_sgram,
+                                   predict=predplot,
+                                   tp_title=tfunc_name)
+            pdf.savefig(fig_obj)
+            with mgzip.open(base_name + '_metric_spenh_fig1.mfigz', 'wb') as f:
+                pickle.dump(fig_obj, f)
             # ----- page 2, plot unmapped predict, opt truth reconstructed and line plots of mean-over-f
             # pdf.savefig(plot_pdb_predtruth(predict=pred_snr_f, tp_title='predict snr_f (db)'))
@@ -1029,22 +1032,28 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
             tg_spec = 20 * np.log10(abs(target_f) + np.finfo(np.float32).eps)
             tg_est_spec = 20 * np.log10(abs(predict_complex) + np.finfo(np.float32).eps)
             # n_spec = np.reshape(n_spec,(n_spec.shape[0] * n_spec.shape[1], n_spec.shape[2]))
-            pdf.savefig(plot_e_predict_truth(predict=tg_est_spec,
-                                             predict_wav=target_est_wav,
-                                             truth_f=tg_spec,
-                                             truth_wav=target_fi,
-                                             metric=np.vstack((lerr_tg_frame, phd_frame)).T,
-                                             tp_title='speech estimate'))
+            fig_obj = plot_e_predict_truth(predict=tg_est_spec,
+                                           predict_wav=target_est_wav,
+                                           truth_f=tg_spec,
+                                           truth_wav=target_fi,
+                                           metric=np.vstack((lerr_tg_frame, phd_frame)).T,
+                                           tp_title='speech estimate')
+            pdf.savefig(fig_obj)
+            with mgzip.open(base_name + '_metric_spenh_fig2.mfigz', 'wb') as f:
+                pickle.dump(fig_obj, f)
             # page 4 noise extraction
             n_spec = 20 * np.log10(abs(noise_f) + np.finfo(np.float32).eps)
             n_est_spec = 20 * np.log10(abs(noise_est_complex) + np.finfo(np.float32).eps)
-            pdf.savefig(plot_e_predict_truth(predict=n_est_spec,
-                                             predict_wav=noise_est_wav,
-                                             truth_f=n_spec,
-                                             truth_wav=noise_fi,
-                                             metric=lerr_n_frame,
-                                             tp_title='noise estimate'))
+            fig_obj = plot_e_predict_truth(predict=n_est_spec,
+                                           predict_wav=noise_est_wav,
+                                           truth_f=n_spec,
+                                           truth_wav=noise_fi,
+                                           metric=lerr_n_frame,
+                                           tp_title='noise estimate')
+            pdf.savefig(fig_obj)
+            with mgzip.open(base_name + '_metric_spenh_fig4.mfigz', 'wb') as f:
+                pickle.dump(fig_obj, f)
             # Plot error waveforms
             # tg_err_wav = target_fi - target_est_wav
@@ -1072,6 +1081,7 @@ def main():
     enable_wav = args['--wav']
     enable_summary = args['--summary']
     predict_location = args['PLOC']
+    num_proc = args['--num_process']
     truth_location = args['TLOC']
     import glob
@@ -1080,6 +1090,7 @@ def main():
     from os.path import join
     from os.path import split
+    import psutil
     from tqdm import tqdm
     from sonusai import create_file_handler
@@ -1153,13 +1164,17 @@ def main():
         fnb = 'metric_spenh_fwhsp_' + asr_model_name + '_'
         logger.info(f'ASR enabled with method {asr_method} and whisper model {asr_model_name}')
         enable_asr_warmup = True
+    elif asr_method == 'sensory':
+        fnb = 'metric_spenh_snsr_' + asr_model_name + '_'
+        logger.info(f'ASR enabled with method {asr_method} and model {asr_model_name}')
+        enable_asr_warmup = True
     else:
         logger.error(f'Unrecognized ASR method: {asr_method}')
         return
     if enable_asr_warmup:
-        DEFAULT_SPEECH = split(DEFAULT_NOISE)[0] + '/speech_ma01_01.wav'
-        audio = read_audio(DEFAULT_SPEECH)
+        default_speech = split(DEFAULT_NOISE)[0] + '/speech_ma01_01.wav'
+        audio = read_audio(default_speech)
         logger.info(f'Warming up asr method, note for cloud service this could take up to a few min ...')
         asr_chk = calc_asr(audio, engine=asr_method, whisper_model_name=asr_model_name)
         logger.info(f'Warmup completed, results {asr_chk}')
@@ -1173,10 +1188,25 @@ def main():
     MP_GLOBAL.asr_method = asr_method
     MP_GLOBAL.asr_model_name = asr_model_name
+    num_cpu = psutil.cpu_count()
+    cpu_percent = psutil.cpu_percent(interval=1)
+    logger.info(f"#CPUs: {num_cpu}, current CPU utilization: {cpu_percent}%")
+    logger.info(f"Memory utilization: {psutil.virtual_memory().percent}%")
+    if num_proc == 'auto':
+        use_cpu = int(num_cpu * (0.9 - cpu_percent / 100))  # default use 80% of available cpus
+    elif num_proc == 'None':
+        use_cpu = None
+    else:
+        use_cpu = min(max(int(num_proc), 1), num_cpu)
     # Individual mixtures use pandas print, set precision to 2 decimal places
     # pd.set_option('float_format', '{:.2f}'.format)
+    logger.info(f"Calculating metrics for {len(mixids)} mixtures using {use_cpu} parallel processes ...")
     progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
-    all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
+    if use_cpu is None:
+        all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, no_par=True)
+    else:
+        all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=use_cpu)
     progress.close()
     all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -1209,9 +1239,9 @@ def main():
     for i in range(len(mtab_snr_summary)):
         if mtab_snr_summary['MXWER'].iloc[i] == 0.0:
             if mtab_snr_summary['WER'].iloc[i] == 0.0:
-                mtab_snr_summary['WERi%'].iloc[i] = 0.0
+                mtab_snr_summary.iloc[i, 6] = 0.0  # mtab_snr_summary['WERi%'].iloc[i] = 0.0
             else:
-                mtab_snr_summary['WERi%'].iloc[i] = -999.0
+                mtab_snr_summary.iloc[i, 6] = -999.0  # mtab_snr_summary['WERi%'].iloc[i] = -999.0
         else:
             if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
                 # update WERi% in 6th col
@@ -1240,7 +1270,6 @@ def main():
     if num_mix > 1:
         # Print pandas data to files using precision to 2 decimals
         # pd.set_option('float_format', '{:.2f}'.format)
-        csp = 0
         if not truth_est_mode:
             ofname = join(predict_location, fnb + 'summary.txt')
@@ -1280,9 +1309,9 @@ def main():
         # Write summary to .csv file
         if not truth_est_mode:
-            csv_name = join(predict_location, fnb + 'summary.csv')
+            csv_name = str(join(predict_location, fnb + 'summary.csv'))
         else:
-            csv_name = join(predict_location, fnb + 'summary_truest.csv')
+            csv_name = str(join(predict_location, fnb + 'truest_summary.csv'))
         header_args = {
             'mode':     'a',
             'encoding': 'utf-8',
@@ -1315,16 +1344,16 @@ def main():
         pd.DataFrame([label]).to_csv(csv_name, **header_args)
         if not truth_est_mode:
-            csv_name = join(predict_location, fnb + 'list.csv')
+            csv_name = str(join(predict_location, fnb + 'list.csv'))
         else:
-            csv_name = join(predict_location, fnb + 'list_truest.csv')
+            csv_name = str(join(predict_location, fnb + 'truest_list.csv'))
         pd.DataFrame(['Speech enhancement metrics list:']).to_csv(csv_name, header=False, index=False)  # open as write
         all_metrics_table_1.round(2).to_csv(csv_name, **table_args)
         if not truth_est_mode:
-            csv_name = join(predict_location, fnb + 'estats_list.csv')
+            csv_name = str(join(predict_location, fnb + 'estats_list.csv'))
         else:
-            csv_name = join(predict_location, fnb + 'estats_list_truest.csv')
+            csv_name = str(join(predict_location, fnb + 'truest_estats_list.csv'))
         pd.DataFrame(['Extraction statistics list:']).to_csv(csv_name, header=False, index=False)  # open as write
         all_metrics_table_2.round(2).to_csv(csv_name, **table_args)

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/doc/doc.py RENAMED Viewed

@@ -255,30 +255,6 @@ The 'truth_settings' parameter specifies the following:
 """ + get_truth_functions() + default
-def doc_asr_manifest() -> str:
-    default = f"\nDefault value: {get_default_config()['asr_manifest']}"
-    return """
-'asr_manifest' is a mixture database configuration parameter that defines an
-optional ASR manifest.
-The parameter takes a list of manifest files to be used to populate ASR data
-per target. Each line of the manifest should be in the following format:
-{"audio_filepath": "/path/to/audio.wav", "text": "the transcription of the utterance", "duration": 23.147}
-The audio_filepath field should provide an absolute path to the audio file corresponding
-to the utterance. The text field should contain the full transcript for the utterance,
-and the duration field should reflect the duration of the utterance in seconds.
-Each entry in the manifest (describing one audio file) should be bordered by '{' and '}'
-and must be contained on one line. The fields that describe the file should be separated
-by commas, and have the form "field_name": value, as shown above.
-Since the manifest specifies the path for each utterance, the audio files do not have to be
-located in the same directory as the manifest, or even in any specific directory structure.
-""" + default
 def doc_augmentations() -> str:
     return """
 Augmentation Rules

sonusai-0.18.0/sonusai/genmetrics.py ADDED Viewed

@@ -0,0 +1,146 @@
+# Generate mixdb metrics based on metrics listed in config.yml
+class MixtureMetrics:
+    @property
+    def mxsnr(self):
+        ...
+    @property
+    def mxssnravg(self):
+        ...
+    @property
+    def mxssnrstd(self):
+        ...
+    @property
+    def mxssnrdavg(self):
+        ...
+    @property
+    def mxssnrdstd(self):
+        ...
+    @property
+    def mxpesq(self):
+        ...
+    @property
+    def mxwsdr(self):
+        ...
+    @property
+    def mxpd(self):
+        ...
+    @property
+    def mxstoi(self):
+        ...
+    @property
+    def mxcsig(self):
+        ...
+    @property
+    def mxcbak(self):
+        ...
+    @property
+    def mxcovl(self):
+        ...
+    def mxwer(self, engine: str, model: str):
+        ...
+    @property
+    def tdco(self):
+        ...
+    @property
+    def tmin(self):
+        ...
+    @property
+    def tmax(self):
+        ...
+    @property
+    def tpkdb(self):
+        ...
+    @property
+    def tlrms(self):
+        ...
+    @property
+    def tpkr(self):
+        ...
+    @property
+    def ttr(self):
+        ...
+    @property
+    def tcr(self):
+        ...
+    @property
+    def tfl(self):
+        ...
+    @property
+    def tpkc(self):
+        ...
+    @property
+    def ndco(self):
+        ...
+    @property
+    def nmin(self):
+        ...
+    @property
+    def nmax(self):
+        ...
+    @property
+    def npkdb(self):
+        ...
+    @property
+    def nlrms(self):
+        ...
+    @property
+    def npkr(self):
+        ...
+    @property
+    def ntr(self):
+        ...
+    @property
+    def ncr(self):
+        ...
+    @property
+    def nfl(self):
+        ...
+    @property
+    def npkc(self):
+        ...
+    @property
+    def sedavg(self):
+        ...
+    @property
+    def sedcnt(self):
+        ...
+    @property
+    def sedtopn(self):
+        ...

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/genmixdb.py RENAMED Viewed

@@ -174,7 +174,6 @@ def genmixdb(location: str,
     from sonusai.mixture import initialize_db
     from sonusai.mixture import load_config
     from sonusai.mixture import log_duration_and_sizes
-    from sonusai.mixture import populate_asr_manifest_table
     from sonusai.mixture import populate_class_label_table
     from sonusai.mixture import populate_class_weights_threshold_table
     from sonusai.mixture import populate_impulse_response_file_table
@@ -195,7 +194,6 @@ def genmixdb(location: str,
     mixdb = MixtureDatabase(location=location, test=test)
     populate_top_table(location, config, test)
-    populate_asr_manifest_table(location, config, test)
     populate_class_label_table(location, config, test)
     populate_class_weights_threshold_table(location, config, test)
     populate_spectral_mask_table(location, config, test)

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/mixture/__init__.py RENAMED Viewed

@@ -88,7 +88,6 @@ from .feature import get_feature_from_audio
 from .generation import generate_mixtures
 from .generation import get_all_snrs_from_config
 from .generation import initialize_db
-from .generation import populate_asr_manifest_table
 from .generation import populate_class_label_table
 from .generation import populate_class_weights_threshold_table
 from .generation import populate_impulse_response_file_table

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/mixture/constants.py RENAMED Viewed

@@ -4,7 +4,6 @@ from importlib.resources import as_file
 from importlib.resources import files
 REQUIRED_CONFIGS = [
-    'asr_manifest',
     'class_balancing',
     'class_balancing_augmentation',
     'class_labels',

{sonusai-0.17.2 → sonusai-0.18.0}/sonusai/mixture/datatypes.py RENAMED Viewed

@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from dataclasses import field
 from typing import Optional
 from typing import TypeAlias
@@ -135,13 +134,7 @@ class UniversalSNR(float):
         return self._is_random
-# @dataclass(frozen=True)
-# class UniversalSNR:
-#     is_random: bool
-#     value: float
-#
-#     def __lt__(self, other) -> bool:
-#         return self.value < other.value
+Speaker: TypeAlias = dict[str, str]
 @dataclass
@@ -151,6 +144,7 @@ class TargetFile(DataClassSonusAIMixin):
     truth_settings: TruthSettings
     class_balancing_augmentation: Optional[AugmentationRule] = None
     level_type: Optional[str] = None
+    speaker_id: Optional[int] = None
     @property
     def duration(self) -> float:
@@ -317,7 +311,6 @@ class FeatureGeneratorInfo:
 @dataclass
 class MixtureDatabaseConfig(DataClassSonusAIMixin):
-    asr_manifest: list[str] = field(default_factory=list)
     class_balancing: Optional[bool] = False
     class_labels: Optional[list[str]] = None
     class_weights_threshold: Optional[list[float]] = None

sonusai 0.17.2__tar.gz → 0.18.0__tar.gz

sonusai 0.17.2tar.gz → 0.18.0tar.gz