PyPI - sonusai - Versions diffs - 0.15.8__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

sonusai 0.15.8py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

sonusai/__init__.py +35 -4
sonusai/audiofe.py +237 -0
sonusai/calc_metric_spenh.py +21 -12
sonusai/genft.py +2 -1
sonusai/genmixdb.py +5 -5
sonusai/lsdb.py +2 -2
sonusai/main.py +58 -61
sonusai/mixture/__init__.py +4 -2
sonusai/mixture/audio.py +0 -34
sonusai/mixture/config.py +1 -2
sonusai/mixture/datatypes.py +1 -1
sonusai/mixture/feature.py +75 -21
sonusai/mixture/helpers.py +60 -30
sonusai/mixture/log_duration_and_sizes.py +2 -2
sonusai/mixture/mixdb.py +13 -10
sonusai/mixture/spectral_mask.py +14 -14
sonusai/mixture/truth_functions/data.py +1 -1
sonusai/mixture/truth_functions/target.py +2 -2
sonusai/mkmanifest.py +29 -2
sonusai/onnx_predict.py +1 -1
sonusai/plot.py +4 -4
sonusai/post_spenh_targetf.py +8 -8
sonusai/utils/__init__.py +8 -7
sonusai/utils/asl_p56.py +3 -3
sonusai/utils/asr.py +35 -8
sonusai/utils/asr_functions/__init__.py +0 -5
sonusai/utils/asr_functions/aaware_whisper.py +2 -2
sonusai/utils/asr_manifest_functions/__init__.py +1 -0
sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
sonusai/utils/audio_devices.py +41 -0
sonusai/utils/calculate_input_shape.py +3 -4
sonusai/utils/create_timestamp.py +5 -0
sonusai/utils/{trim_docstring.py → docstring.py} +20 -0
sonusai/utils/model_utils.py +30 -0
sonusai/utils/onnx_utils.py +19 -45
sonusai/utils/reshape.py +11 -11
sonusai/utils/wave.py +12 -5
{sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/METADATA +8 -19
{sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/RECORD +41 -54
{sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/WHEEL +1 -1
sonusai/data_generator/__init__.py +0 -5
sonusai/data_generator/dataset_from_mixdb.py +0 -143
sonusai/data_generator/keras_from_mixdb.py +0 -169
sonusai/data_generator/torch_from_mixdb.py +0 -122
sonusai/evaluate.py +0 -245
sonusai/keras_onnx.py +0 -86
sonusai/keras_predict.py +0 -231
sonusai/keras_train.py +0 -334
sonusai/torchl_onnx.py +0 -216
sonusai/torchl_predict.py +0 -547
sonusai/torchl_train.py +0 -223
sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
sonusai/utils/asr_functions/data.py +0 -16
sonusai/utils/asr_functions/deepgram.py +0 -97
sonusai/utils/asr_functions/fastwhisper.py +0 -90
sonusai/utils/asr_functions/google.py +0 -95
sonusai/utils/asr_functions/whisper.py +0 -49
sonusai/utils/keras_utils.py +0 -226
{sonusai-0.15.8.dist-info → sonusai-0.16.0.dist-info}/entry_points.txt +0 -0

sonusai/__init__.py CHANGED Viewed

@@ -5,6 +5,24 @@ from os.path import dirname
 __version__ = metadata.version(__package__)
 BASEDIR = dirname(__file__)
+commands_doc = """
+   audiofe                      Audio front end
+   calc_metric_spenh            Run speech enhancement and analysis
+   doc                          Documentation
+   genft                        Generate feature and truth data
+   genmix                       Generate mixture and truth data
+   genmixdb                     Generate a mixture database
+   gentcst                      Generate target configuration from a subdirectory tree
+   lsdb                         List information about a mixture database
+   mkmanifest                   Make ASR manifest JSON file
+   mkwav                        Make WAV files from a mixture database
+   onnx_predict                 Run ONNX predict on a trained model
+   plot                         Plot mixture data
+   post_spenh_targetf           Run post-processing for speech enhancement targetf data
+   tplot                        Plot truth data
+   vars                         List custom SonusAI variables
+"""
 # create logger
 logger = logging.getLogger('sonusai')
 logger.setLevel(logging.DEBUG)
@@ -21,7 +39,7 @@ class SonusAIError(Exception):
 # create file handler
-def create_file_handler(filename: str):
+def create_file_handler(filename: str) -> None:
     fh = logging.FileHandler(filename=filename, mode='w')
     fh.setLevel(logging.DEBUG)
     fh.setFormatter(formatter)
@@ -29,7 +47,7 @@ def create_file_handler(filename: str):
 # update console handler
-def update_console_handler(verbose: bool):
+def update_console_handler(verbose: bool) -> None:
     if not verbose:
         logger.removeHandler(console_handler)
         console_handler.setLevel(logging.INFO)
@@ -37,14 +55,17 @@ def update_console_handler(verbose: bool):
 # write initial log message
-def initial_log_messages(name: str):
+def initial_log_messages(name: str, subprocess: str = None) -> None:
     from datetime import datetime
     from getpass import getuser
     from os import getcwd
     from socket import gethostname
     from sys import argv
-    logger.info(f'SonusAI {__version__}')
+    if subprocess is None:
+        logger.info(f'SonusAI {__version__}')
+    else:
+        logger.info(f'SonusAI {subprocess}')
     logger.info(f'{name}')
     logger.info('')
     logger.debug(f'Host:      {gethostname()}')
@@ -53,3 +74,13 @@ def initial_log_messages(name: str):
     logger.debug(f'Date:      {datetime.now()}')
     logger.debug(f'Command:   {" ".join(argv)}')
     logger.debug('')
+def commands_list(doc: str = commands_doc) -> list[str]:
+    lines = doc.split('\n')
+    commands = []
+    for line in lines:
+        command = line.strip().split(' ').pop(0)
+        if command:
+            commands.append(command)
+    return commands

sonusai/audiofe.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""sonusai audiofe
+usage: audiofe [-hvds] [--version] [-i INPUT] [-l LENGTH] [-m MODEL] [-k CKPT] [-a ASR] [-w WMODEL]
+options:
+    -h, --help
+    -v, --verbose                   Be verbose.
+    -d, --debug                     Write debug data to H5 file.
+    -s, --show                      Show a list of available audio inputs.
+    -i INPUT, --input INPUT         Input audio.
+    -l LENGTH, --length LENGTH      Length of audio in seconds. [default: -1].
+    -m MODEL, --model MODEL         PL model .py file path.
+    -k CKPT, --checkpoint CKPT      PL checkpoint file with weights.
+    -a ASR, --asr ASR               ASR method to use.
+    -w WMODEL, --whisper WMODEL     Whisper model used in aixplain_whisper and whisper methods. [default: tiny].
+Aaware SonusAI Audio Front End.
+Capture LENGTH seconds of audio from INPUT. If LENGTH is < 0, then capture until key is pressed. If INPUT is a valid
+audio file name, then use the audio data from the specified file. In this case, if LENGTH is < 0, process entire file;
+otherwise, process min(length(INPUT), LENGTH) seconds of audio from INPUT. Audio is saved to
+audiofe_capture_<TIMESTAMP>.wav.
+If a model is specified, run prediction on audio data from this model. Then compute the inverse transform of the
+prediction result and save to audiofe_predict_<TIMESTAMP>.wav.
+If an ASR is specified, run ASR on the captured audio and print the results. In addition, if a model was also specified,
+run ASR on the predict audio and print the results.
+If the debug option is enabled, write capture audio, feature, reconstruct audio, predict, and predict audio to
+audiofe_<TIMESTAMP>.h5.
+"""
+from os.path import exists
+from select import select
+from sys import stdin
+import h5py
+import numpy as np
+import pyaudio
+import torch
+from docopt import docopt
+from docopt import printable_usage
+import sonusai
+from sonusai import create_file_handler
+from sonusai import initial_log_messages
+from sonusai import logger
+from sonusai import update_console_handler
+from sonusai.mixture import AudioT
+from sonusai.mixture import CHANNEL_COUNT
+from sonusai.mixture import SAMPLE_RATE
+from sonusai.mixture import get_audio_from_feature
+from sonusai.mixture import get_feature_from_audio
+from sonusai.mixture import read_audio
+from sonusai.utils import calc_asr
+from sonusai.utils import create_timestamp
+from sonusai.utils import get_input_device_index_by_name
+from sonusai.utils import get_input_devices
+from sonusai.utils import load_torchl_ckpt_model
+from sonusai.utils import trim_docstring
+from sonusai.utils import write_wav
+def main() -> None:
+    args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
+    ts = create_timestamp()
+    verbose = args['--verbose']
+    length = float(args['--length'])
+    input_name = args['--input']
+    model_name = args['--model']
+    ckpt_name = args['--checkpoint']
+    asr_name = args['--asr']
+    whisper_name = args['--whisper']
+    debug = args['--debug']
+    show = args['--show']
+    capture_name = f'audiofe_capture_{ts}.wav'
+    predict_name = f'audiofe_predict_{ts}.wav'
+    h5_name = f'audiofe_{ts}.h5'
+    if model_name is not None and ckpt_name is None:
+        print(printable_usage(trim_docstring(__doc__)))
+        exit(1)
+    # Setup logging file
+    create_file_handler('audiofe.log')
+    update_console_handler(verbose)
+    initial_log_messages('audiofe')
+    if show:
+        logger.info('List of available audio inputs:')
+        logger.info('')
+        p = pyaudio.PyAudio()
+        for name in get_input_devices(p):
+            logger.info(f'{name}')
+        logger.info('')
+        p.terminate()
+        return
+    if input_name is not None and exists(input_name):
+        capture_audio = get_frames_from_file(input_name, length)
+    else:
+        try:
+            capture_audio = get_frames_from_device(input_name, length)
+        except ValueError as e:
+            logger.exception(e)
+            return
+    write_wav(capture_name, capture_audio, SAMPLE_RATE)
+    logger.info('')
+    logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {capture_name}')
+    if debug:
+        with h5py.File(h5_name, 'a') as f:
+            if 'capture_audio' in f:
+                del f['capture_audio']
+            f.create_dataset('capture_audio', data=capture_audio)
+        logger.info(f'Wrote capture audio with shape {capture_audio.shape} to {h5_name}')
+    if asr_name is not None:
+        capture_asr = calc_asr(capture_audio, engine=asr_name, whisper_model_name=whisper_name).text
+        logger.info(f'Capture audio ASR: {capture_asr}')
+    if model_name is not None:
+        model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
+        model.eval()
+        feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
+        if debug:
+            with h5py.File(h5_name, 'a') as f:
+                if 'feature' in f:
+                    del f['feature']
+                f.create_dataset('feature', data=feature)
+            logger.info(f'Wrote feature with shape {feature.shape} to {h5_name}')
+        # if debug:
+        #     reconstruct_name = f'audiofe_reconstruct_{ts}.wav'
+        #     reconstruct_audio = get_audio_from_feature(feature=feature, feature_mode=model.hparams.feature)
+        #     samples = min(len(capture_audio), len(reconstruct_audio))
+        #     max_err = np.max(np.abs(capture_audio[:samples] - reconstruct_audio[:samples]))
+        #     logger.info(f'Maximum error between capture and reconstruct: {max_err}')
+        #     write_wav(reconstruct_name, reconstruct_audio, SAMPLE_RATE)
+        #     logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {reconstruct_name}')
+        #     with h5py.File(h5_name, 'a') as f:
+        #         if 'reconstruct_audio' in f:
+        #             del f['reconstruct_audio']
+        #         f.create_dataset('reconstruct_audio', data=reconstruct_audio)
+        #     logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
+        with torch.no_grad():
+            # model wants batch x timesteps x feature_parameters
+            predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
+        if debug:
+            with h5py.File(h5_name, 'a') as f:
+                if 'predict' in f:
+                    del f['predict']
+                f.create_dataset('predict', data=predict)
+            logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
+        predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
+        write_wav(predict_name, predict_audio, SAMPLE_RATE)
+        logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
+        if debug:
+            with h5py.File(h5_name, 'a') as f:
+                if 'predict_audio' in f:
+                    del f['predict_audio']
+                f.create_dataset('predict_audio', data=predict_audio)
+            logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {h5_name}')
+        if asr_name is not None:
+            predict_asr = calc_asr(predict_audio, engine=asr_name, whisper_model_name=whisper_name).text
+            logger.info(f'Predict audio ASR: {predict_asr}')
+def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
+    p = pyaudio.PyAudio()
+    input_devices = get_input_devices(p)
+    if not input_devices:
+        raise ValueError('No input audio devices found')
+    if input_name is None:
+        input_name = input_devices[0]
+    try:
+        device_index = get_input_device_index_by_name(p, input_name)
+    except ValueError:
+        msg = f'Could not find {input_name}\n'
+        msg += f'Available devices:\n'
+        for input_device in input_devices:
+            msg += f'  {input_device}\n'
+        raise ValueError(msg)
+    logger.info(f'Capturing from {p.get_device_info_by_index(device_index).get("name")}')
+    stream = p.open(format=pyaudio.paFloat32,
+                    channels=CHANNEL_COUNT,
+                    rate=SAMPLE_RATE,
+                    input=True,
+                    input_device_index=device_index)
+    stream.start_stream()
+    print()
+    print('+---------------------------------+')
+    print('| Press Enter to stop             |')
+    print('+---------------------------------+')
+    print()
+    elapsed = 0.0
+    seconds_per_chunk = float(chunk) / float(SAMPLE_RATE)
+    raw_frames = []
+    while elapsed < length or length == -1:
+        raw_frames.append(stream.read(num_frames=chunk, exception_on_overflow=False))
+        elapsed += seconds_per_chunk
+        if select([stdin, ], [], [], 0)[0]:
+            stdin.read(1)
+            length = elapsed
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+    frames = np.frombuffer(b''.join(raw_frames), dtype=np.float32)
+    return frames
+def get_frames_from_file(input_name: str, length: float) -> AudioT:
+    logger.info(f'Capturing from {input_name}')
+    frames = read_audio(input_name)
+    if length != -1:
+        num_frames = int(length * SAMPLE_RATE)
+        if len(frames) > num_frames:
+            frames = frames[:num_frames]
+    return frames
+if __name__ == '__main__':
+    main()

sonusai/calc_metric_spenh.py CHANGED Viewed

@@ -758,13 +758,18 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
             predict = stack_complex(predict)
     # 2) Collect true target, noise, mixture data, trim to predict size if needed
-    target = mixdb.mixture_target(mixid)
-    target_f = mixdb.mixture_target_f(mixid, target=target)
-    noise = mixdb.mixture_noise(mixid)
-    noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
-    mixture = mixdb.mixture_mixture(mixid, target=target, noise=noise)
+    tmp = mixdb.mixture_targets(mixid)  # targets is list of pre-IR and pre-specaugment targets
+    target_f = mixdb.mixture_targets_f(mixid, targets=tmp)[0]
+    target = tmp[0]
+    mixture = mixdb.mixture_mixture(mixid)  # note: gives full reverberated/distorted target, but no specaugment
+    # noise_wodist = mixdb.mixture_noise(mixid)            # noise without specaugment and distortion
+    # noise_wodist_f = mixdb.mixture_noise_f(mixid, noise=noise_wodist)
+    noise = mixture - target  # has time-domain distortion (ir,etc.) but does not have specaugment
+    # noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
+    segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)  # note: uses pre-IR, pre-specaug audio
     mixture_f = mixdb.mixture_mixture_f(mixid, mixture=mixture)
-    segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
+    noise_f = mixture_f - target_f  # true noise in freq domain includes specaugment and time-domain ir,distortions
+    # segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
     segsnr_f[segsnr_f == inf] = 7.944e8  # 99db
     segsnr_f[segsnr_f == -inf] = 1.258e-10  # -99db
     # need to use inv-tf to match #samples & latency shift properties of predict inv tf
@@ -920,8 +925,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
                           'NLERR': lerr_n_frame,
                           'SPD':   phd_frame})
     metr2 = metr2.describe()  # Use pandas stat function
-    metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(
-        lambda x: 10 * np.log10(x + 1.01e-10))  # Change SSNR stats to dB, except count
+    # Change SSNR stats to dB, except count.  SSNR is index 0, pandas requires using iloc
+    # metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
+    metr2.iloc[1:, 0] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
     # create a single row in multi-column header
     new_labels = pd.MultiIndex.from_product([metr2.columns,
                                              ['Avg', 'Min', 'Med', 'Max', 'Std']],
@@ -1166,7 +1172,7 @@ def main():
     # Individual mixtures use pandas print, set precision to 2 decimal places
     # pd.set_option('float_format', '{:.2f}'.format)
     progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
-    all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=None)
+    all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
     progress.close()
     all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -1192,6 +1198,7 @@ def main():
         if ~np.isnan(tmp.iloc[0].to_numpy()[0]).any():
             mtab_snr_summary_em = pd.concat([mtab_snr_summary_em, tmp])
+    mtab_snr_summary = mtab_snr_summary.sort_values(by=['MXSNR'], ascending=False)
     # Correct percentages in snr summary table
     mtab_snr_summary['PESQi%'] = 100 * (mtab_snr_summary['PESQ'] - mtab_snr_summary['MXPESQ']) / np.maximum(
         mtab_snr_summary['MXPESQ'], 0.01)
@@ -1202,9 +1209,11 @@ def main():
             else:
                 mtab_snr_summary['WERi%'].iloc[i] = -999.0
         else:
-            mtab_snr_summary['WERi%'].iloc[i] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
-                                                       mtab_snr_summary['WER'].iloc[i]) / \
-                                                mtab_snr_summary['MXWER'].iloc[i]
+            if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
+                # update WERi% in 6th col
+                mtab_snr_summary.iloc[i, 6] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
+                                                     mtab_snr_summary['WER'].iloc[i]) / \
+                                              mtab_snr_summary['MXWER'].iloc[i]
     # Calculate avg metrics over all mixtures except -99
     all_mtab1_sorted_nom99 = all_mtab1_sorted[all_mtab1_sorted.MXSNR != -99]

sonusai/genft.py CHANGED Viewed

@@ -165,7 +165,8 @@ def main() -> None:
     logger.info(f'Wrote {len(mixids)} mixtures to {location}')
     logger.info('')
     logger.info(f'Duration: {seconds_to_hms(seconds=duration)}')
-    logger.info(f'feature:  {human_readable_size(total_feature_frames * mixdb.fg_stride * mixdb.fg_num_bands * 4, 1)}')
+    logger.info(
+        f'feature:  {human_readable_size(total_feature_frames * mixdb.fg_stride * mixdb.feature_parameters * 4, 1)}')
     logger.info(f'truth_f:  {human_readable_size(total_feature_frames * mixdb.num_classes * 4, 1)}')
     if compute_segsnr:
         logger.info(f'segsnr:   {human_readable_size(total_transform_frames * 4, 1)}')

sonusai/genmixdb.py CHANGED Viewed

@@ -225,7 +225,7 @@ def genmixdb(location: str,
     if logging:
         logger.info('Collecting impulse responses')
-    impulse_response_files = get_impulse_response_files(config, show_progress=show_progress)
+    impulse_response_files = get_impulse_response_files(config)
     populate_impulse_response_file_table(location, impulse_response_files, test)
@@ -337,12 +337,12 @@ def genmixdb(location: str,
         log_duration_and_sizes(total_duration=total_duration,
                                num_classes=mixdb.num_classes,
                                feature_step_samples=mixdb.feature_step_samples,
-                               num_bands=mixdb.fg_num_bands,
+                               feature_parameters=mixdb.feature_parameters,
                                stride=mixdb.fg_stride,
                                desc='Estimated')
         logger.info(f'Feature shape:        '
-                    f'{mixdb.fg_stride} x {mixdb.fg_num_bands} '
-                    f'({mixdb.fg_stride * mixdb.fg_num_bands} total params)')
+                    f'{mixdb.fg_stride} x {mixdb.feature_parameters} '
+                    f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
         logger.info(f'Feature samples:      {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
         logger.info(f'Feature step samples: {mixdb.feature_step_samples} samples ({mixdb.feature_step_ms} ms)')
         logger.info('')
@@ -371,7 +371,7 @@ def genmixdb(location: str,
         log_duration_and_sizes(total_duration=total_duration,
                                num_classes=mixdb.num_classes,
                                feature_step_samples=mixdb.feature_step_samples,
-                               num_bands=mixdb.fg_num_bands,
+                               feature_parameters=mixdb.feature_parameters,
                                stride=mixdb.fg_stride,
                                desc='Actual')
         logger.info('')

sonusai/lsdb.py CHANGED Viewed

@@ -48,8 +48,8 @@ def lsdb(mixdb: MixtureDatabase,
     logger.info(f'{"Targets":{desc_len}} {mixdb.num_target_files}')
     logger.info(f'{"Noises":{desc_len}} {mixdb.num_noise_files}')
     logger.info(f'{"Feature":{desc_len}} {mixdb.feature}')
-    logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.fg_num_bands} '
-                f'({mixdb.fg_stride * mixdb.fg_num_bands} total params)')
+    logger.info(f'{"Feature shape":{desc_len}} {mixdb.fg_stride} x {mixdb.feature_parameters} '
+                f'({mixdb.fg_stride * mixdb.feature_parameters} total params)')
     logger.info(f'{"Feature samples":{desc_len}} {mixdb.feature_samples} samples ({mixdb.feature_ms} ms)')
     logger.info(f'{"Feature step samples":{desc_len}} {mixdb.feature_step_samples} samples '
                 f'({mixdb.feature_step_ms} ms)')

sonusai/main.py CHANGED Viewed

@@ -3,91 +3,88 @@
 usage: sonusai [--version] [--help] <command> [<args>...]
 The sonusai commands are:
-   calc_metric_spenh            Run speech enhancement and analysis
-   doc                          Documentation
-   evaluate                     Evaluate model performance
-   genft                        Generate feature and truth data
-   genmix                       Generate mixture and truth data
-   genmixdb                     Generate a mixture database
-   gentcst                      Generate target configuration from a subdirectory tree
-   keras_onnx                   Convert a trained Keras model to ONNX
-   keras_predict                Run Keras predict on a trained model
-   keras_train                  Train a model using Keras
-   lsdb                         List information about a mixture database
-   mkmanifest                   Make ASR manifest JSON file
-   mkwav                        Make WAV files from a mixture database
-   onnx_predict                 Run ONNX predict on a trained model
-   plot                         Plot mixture data
-   post_spenh_targetf           Run post-processing for speech enhancement targetf data
-   torchl_onnx                  Convert a trained Pytorch Lightning model to ONNX
-   torchl_predict               Run Lightning predict on a trained model
-   torchl_train                 Train a model using Lightning
-   tplot                        Plot truth data
-   vars                         List custom SonusAI variables
+    <This information is automatically generated.>
 Aaware Sound and Voice Machine Learning Framework. See 'sonusai help <command>'
 for more information on a specific command.
 """
-from sonusai import logger
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 def main() -> None:
+    from importlib import import_module
+    from pkgutil import iter_modules
+    from sonusai import commands_list
+    plugins = {}
+    plugin_docstrings = []
+    for _, name, _ in iter_modules():
+        if name.startswith('sonusai_') and not name.startswith('sonusai_asr_'):
+            module = import_module(name)
+            plugins[name] = {
+                'commands': commands_list(module.commands_doc),
+                'basedir':  module.BASEDIR,
+            }
+            plugin_docstrings.append(module.commands_doc)
     from docopt import docopt
-    import sonusai
+    from sonusai import __version__
+    from sonusai.utils import add_commands_to_docstring
     from sonusai.utils import trim_docstring
-    commands = (
-        'calc_metric_spenh',
-        'doc',
-        'evaluate',
-        'genft',
-        'genmix',
-        'genmixdb',
-        'gentcst',
-        'keras_onnx',
-        'keras_predict',
-        'keras_train',
-        'lsdb',
-        'mkmanifest',
-        'mkwav',
-        'onnx_predict',
-        'plot',
-        'post_spenh_targetf',
-        'torchl_onnx',
-        'torchl_predict',
-        'torchl_train',
-        'tplot',
-        'vars',
-    )
-    args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
+    args = docopt(trim_docstring(add_commands_to_docstring(__doc__, plugin_docstrings)),
+                  version=__version__,
+                  options_first=True)
     command = args['<command>']
     argv = args['<args>']
+    import sys
+    from os.path import join
     from subprocess import call
     import sonusai
-    from sonusai import SonusAIError
+    from sonusai import logger
+    base_commands = sonusai.commands_list()
     if command == 'help':
         if not argv:
             exit(call(['sonusai', '-h']))
-        elif argv[0] in commands:
-            exit(call(['python', f'{sonusai.BASEDIR}/{argv[0]}.py', '-h']))
-        else:
-            raise SonusAIError(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
-    elif command in commands:
-        exit(call(['python', f'{sonusai.BASEDIR}/{command}.py'] + argv))
+        elif argv[0] in base_commands:
+            exit(call(['python', f'{join(sonusai.BASEDIR, argv[0])}.py', '-h']))
+        for plugin, data in plugins.items():
+            if argv[0] in data['commands']:
+                exit(call(['python', f'{join(data["basedir"], argv[0])}.py', '-h']))
+        logger.error(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
+        sys.exit(1)
+    if command in base_commands:
+        exit(call(['python', f'{join(sonusai.BASEDIR, command)}.py'] + argv))
+    for plugin, data in plugins.items():
+        if command in data['commands']:
+            exit(call(['python', f'{join(data["basedir"], command)}.py'] + argv))
-    raise SonusAIError(f"{command} is not a SonusAI command. See 'sonusai help'.")
+    logger.error(f"{command} is not a SonusAI command. See 'sonusai help'.")
+    sys.exit(1)
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/mixture/__init__.py CHANGED Viewed

@@ -1,6 +1,4 @@
 # SonusAI mixture utilities
-from .audio import calculate_audio_from_transform
-from .audio import calculate_transform_from_audio
 from .audio import get_duration
 from .audio import get_next_noise
 from .audio import get_num_samples
@@ -83,6 +81,7 @@ from .datatypes import TruthFunctionConfig
 from .datatypes import TruthSetting
 from .datatypes import TruthSettings
 from .datatypes import UniversalSNR
+from .feature import get_audio_from_feature
 from .feature import get_feature_from_audio
 from .generation import generate_mixtures
 from .generation import get_all_snrs_from_config
@@ -102,11 +101,14 @@ from .helpers import augmented_noise_samples
 from .helpers import augmented_target_samples
 from .helpers import check_audio_files_exist
 from .helpers import forward_transform
+from .helpers import get_audio_from_transform
 from .helpers import get_ft
 from .helpers import get_segsnr
+from .helpers import get_transform_from_audio
 from .helpers import get_truth_t
 from .helpers import inverse_transform
 from .helpers import mixture_metadata
+from .helpers import read_mixture_data
 from .helpers import write_mixture_data
 from .helpers import write_mixture_metadata
 from .log_duration_and_sizes import log_duration_and_sizes

sonusai/mixture/audio.py CHANGED Viewed

@@ -1,11 +1,6 @@
 from functools import lru_cache
-from pyaaware import ForwardTransform
-from pyaaware import InverseTransform
-from sonusai.mixture.datatypes import AudioF
 from sonusai.mixture.datatypes import AudioT
-from sonusai.mixture.datatypes import EnergyT
 from sonusai.mixture.datatypes import ImpulseResponseData
@@ -22,35 +17,6 @@ def get_next_noise(audio: AudioT, offset: int, length: int) -> AudioT:
     return np.take(audio, range(offset, offset + length), mode='wrap')
-def calculate_transform_from_audio(audio: AudioT,
-                                   transform: ForwardTransform) -> tuple[AudioF, EnergyT]:
-    """Apply forward transform to input audio data to generate transform data
-    :param audio: Time domain data [samples]
-    :param transform: ForwardTransform object
-    :return: Frequency domain data [frames, bins], Energy [frames]
-    """
-    f, e = transform.execute_all(audio)
-    return f.transpose(), e
-def calculate_audio_from_transform(data: AudioF,
-                                   transform: InverseTransform,
-                                   trim: bool = True) -> tuple[AudioT, EnergyT]:
-    """Apply inverse transform to input transform data to generate audio data
-    :param data: Frequency domain data [frames, bins]
-    :param transform: InverseTransform object
-    :param trim: Removes starting samples so output waveform will be time-aligned with input waveform to the transform
-    :return: Time domain data [samples], Energy [frames]
-    """
-    t, e = transform.execute_all(data.transpose())
-    if trim:
-        t = t[transform.N - transform.R:]
-    return t, e
 def get_duration(audio: AudioT) -> float:
     """Get duration of audio in seconds

sonusai 0.15.8__py3-none-any.whl → 0.16.0__py3-none-any.whl

sonusai 0.15.8py3-none-any.whl → 0.16.0py3-none-any.whl