PyPI - sonusai - Versions diffs - 0.15.9__py3-none-any.whl → 0.16.1__py3-none-any.whl - Mend

sonusai 0.15.9py3-none-any.whl → 0.16.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

sonusai/__init__.py +36 -4
sonusai/audiofe.py +111 -106
sonusai/calc_metric_spenh.py +38 -22
sonusai/genft.py +15 -6
sonusai/genmix.py +14 -6
sonusai/genmixdb.py +15 -7
sonusai/gentcst.py +13 -6
sonusai/lsdb.py +15 -5
sonusai/main.py +58 -61
sonusai/mixture/__init__.py +1 -0
sonusai/mixture/config.py +1 -2
sonusai/mkmanifest.py +43 -8
sonusai/mkwav.py +15 -6
sonusai/onnx_predict.py +16 -6
sonusai/plot.py +16 -6
sonusai/post_spenh_targetf.py +13 -6
sonusai/summarize_metric_spenh.py +71 -0
sonusai/tplot.py +14 -6
sonusai/utils/__init__.py +4 -7
sonusai/utils/asl_p56.py +3 -3
sonusai/utils/asr.py +35 -8
sonusai/utils/asr_functions/__init__.py +0 -5
sonusai/utils/asr_functions/aaware_whisper.py +2 -2
sonusai/utils/asr_manifest_functions/__init__.py +1 -0
sonusai/utils/asr_manifest_functions/mcgill_speech.py +29 -0
sonusai/utils/{trim_docstring.py → docstring.py} +20 -0
sonusai/utils/model_utils.py +30 -0
sonusai/utils/onnx_utils.py +19 -45
{sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/METADATA +7 -25
{sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/RECORD +32 -46
sonusai/data_generator/__init__.py +0 -5
sonusai/data_generator/dataset_from_mixdb.py +0 -143
sonusai/data_generator/keras_from_mixdb.py +0 -169
sonusai/data_generator/torch_from_mixdb.py +0 -122
sonusai/keras_onnx.py +0 -86
sonusai/keras_predict.py +0 -231
sonusai/keras_train.py +0 -334
sonusai/torchl_onnx.py +0 -216
sonusai/torchl_predict.py +0 -542
sonusai/torchl_train.py +0 -223
sonusai/utils/asr_functions/aixplain_whisper.py +0 -59
sonusai/utils/asr_functions/data.py +0 -16
sonusai/utils/asr_functions/deepgram.py +0 -97
sonusai/utils/asr_functions/fastwhisper.py +0 -90
sonusai/utils/asr_functions/google.py +0 -95
sonusai/utils/asr_functions/whisper.py +0 -49
sonusai/utils/keras_utils.py +0 -226
{sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/WHEEL +0 -0
{sonusai-0.15.9.dist-info → sonusai-0.16.1.dist-info}/entry_points.txt +0 -0

sonusai/lsdb.py CHANGED Viewed

@@ -15,11 +15,25 @@ Inputs:
     LOC     A SonusAI mixture database directory.
 """
+import signal
 from sonusai import logger
 from sonusai.mixture import GeneralizedIDs
 from sonusai.mixture import MixtureDatabase
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 def lsdb(mixdb: MixtureDatabase,
          mixids: GeneralizedIDs = None,
          truth_index: int = None,
@@ -142,8 +156,4 @@ def main() -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/main.py CHANGED Viewed

@@ -3,91 +3,88 @@
 usage: sonusai [--version] [--help] <command> [<args>...]
 The sonusai commands are:
-   audiofe                      Audio front end
-   calc_metric_spenh            Run speech enhancement and analysis
-   doc                          Documentation
-   genft                        Generate feature and truth data
-   genmix                       Generate mixture and truth data
-   genmixdb                     Generate a mixture database
-   gentcst                      Generate target configuration from a subdirectory tree
-   keras_onnx                   Convert a trained Keras model to ONNX
-   keras_predict                Run Keras predict on a trained model
-   keras_train                  Train a model using Keras
-   lsdb                         List information about a mixture database
-   mkmanifest                   Make ASR manifest JSON file
-   mkwav                        Make WAV files from a mixture database
-   onnx_predict                 Run ONNX predict on a trained model
-   plot                         Plot mixture data
-   post_spenh_targetf           Run post-processing for speech enhancement targetf data
-   torchl_onnx                  Convert a trained Pytorch Lightning model to ONNX
-   torchl_predict               Run Lightning predict on a trained model
-   torchl_train                 Train a model using Lightning
-   tplot                        Plot truth data
-   vars                         List custom SonusAI variables
+    <This information is automatically generated.>
 Aaware Sound and Voice Machine Learning Framework. See 'sonusai help <command>'
 for more information on a specific command.
 """
-from sonusai import logger
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 def main() -> None:
+    from importlib import import_module
+    from pkgutil import iter_modules
+    from sonusai import commands_list
+    plugins = {}
+    plugin_docstrings = []
+    for _, name, _ in iter_modules():
+        if name.startswith('sonusai_') and not name.startswith('sonusai_asr_'):
+            module = import_module(name)
+            plugins[name] = {
+                'commands': commands_list(module.commands_doc),
+                'basedir':  module.BASEDIR,
+            }
+            plugin_docstrings.append(module.commands_doc)
     from docopt import docopt
-    import sonusai
+    from sonusai import __version__
+    from sonusai.utils import add_commands_to_docstring
     from sonusai.utils import trim_docstring
-    commands = (
-        'audiofe',
-        'calc_metric_spenh',
-        'doc',
-        'genft',
-        'genmix',
-        'genmixdb',
-        'gentcst',
-        'keras_onnx',
-        'keras_predict',
-        'keras_train',
-        'lsdb',
-        'mkmanifest',
-        'mkwav',
-        'onnx_predict',
-        'plot',
-        'post_spenh_targetf',
-        'torchl_onnx',
-        'torchl_predict',
-        'torchl_train',
-        'tplot',
-        'vars',
-    )
-    args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
+    args = docopt(trim_docstring(add_commands_to_docstring(__doc__, plugin_docstrings)),
+                  version=__version__,
+                  options_first=True)
     command = args['<command>']
     argv = args['<args>']
+    import sys
+    from os.path import join
     from subprocess import call
     import sonusai
-    from sonusai import SonusAIError
+    from sonusai import logger
+    base_commands = sonusai.commands_list()
     if command == 'help':
         if not argv:
             exit(call(['sonusai', '-h']))
-        elif argv[0] in commands:
-            exit(call(['python', f'{sonusai.BASEDIR}/{argv[0]}.py', '-h']))
-        else:
-            raise SonusAIError(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
-    elif command in commands:
-        exit(call(['python', f'{sonusai.BASEDIR}/{command}.py'] + argv))
+        elif argv[0] in base_commands:
+            exit(call(['python', f'{join(sonusai.BASEDIR, argv[0])}.py', '-h']))
+        for plugin, data in plugins.items():
+            if argv[0] in data['commands']:
+                exit(call(['python', f'{join(data["basedir"], argv[0])}.py', '-h']))
+        logger.error(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
+        sys.exit(1)
+    if command in base_commands:
+        exit(call(['python', f'{join(sonusai.BASEDIR, command)}.py'] + argv))
+    for plugin, data in plugins.items():
+        if command in data['commands']:
+            exit(call(['python', f'{join(data["basedir"], command)}.py'] + argv))
-    raise SonusAIError(f"{command} is not a SonusAI command. See 'sonusai help'.")
+    logger.error(f"{command} is not a SonusAI command. See 'sonusai help'.")
+    sys.exit(1)
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/mixture/__init__.py CHANGED Viewed

@@ -108,6 +108,7 @@ from .helpers import get_transform_from_audio
 from .helpers import get_truth_t
 from .helpers import inverse_transform
 from .helpers import mixture_metadata
+from .helpers import read_mixture_data
 from .helpers import write_mixture_data
 from .helpers import write_mixture_metadata
 from .log_duration_and_sizes import log_duration_and_sizes

sonusai/mixture/config.py CHANGED Viewed

@@ -480,11 +480,10 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
     return noise_files
-def get_impulse_response_files(config: dict, show_progress: bool = False) -> ImpulseResponseFiles:
+def get_impulse_response_files(config: dict) -> ImpulseResponseFiles:
     """Get the list of impulse response files from a config
     :param config: Config dictionary
-    :param show_progress: Show progress bar
     :return: List of impulse response files
     """
     from itertools import chain

sonusai/mkmanifest.py CHANGED Viewed

@@ -30,6 +30,8 @@ Inputs:
                     - 'librispeech'
                     - 'vctk_noisy_speech' expects subdirs named like <name>_wav/ and <name>_txt/ with files in
                       each using same basename, but with .wav and .txt respectively.
+                    - 'mcgill-speech' expects audio data in basename/speakerid/speakerid-promptid.wav and
+                      transcript data in Scripts/HarvardLists.dat
     ADAT        Audio data environment variable. All found files will be expanded to their full, absolute path and
                 then parts of the path that match the specified environment variable value will be replaced with
                 the variable. This accommodates portability across platforms where the sound datasets may in
@@ -42,11 +44,23 @@ Outputs the following to the current directory:
 Example usage for LibriSpeech:
   sonusai mkmanifest -mlibrispeech -eADAT -oasr_manifest.json --include='*.flac' train-clean-100
+  sonusai mkmanifest -m mcgill-speech -e ADAT -o asr_manifest_16k.json 16k-LP7/
 """
-from sonusai import logger
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
-VALID_METHOD = ['librispeech', 'vctk_noisy_speech']
+VALID_METHOD = ['librispeech', 'vctk_noisy_speech', 'mcgill-speech']
 def main() -> None:
@@ -88,6 +102,7 @@ def main() -> None:
     from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
     from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry
     from sonusai.utils.asr_manifest_functions import get_vctk_noisy_speech_manifest_entry
+    from sonusai.utils.asr_manifest_functions import get_mcgill_speech_manifest_entry
     start_time = time.monotonic()
@@ -160,6 +175,30 @@ def main() -> None:
             for result in results:
                 f.write(json.dumps(result) + '\n')
+    if method == 'mcgill-speech':
+        logger.info(f'Found {len(entries)} Mcgill Speech files, opening prompt file ...')
+        # Note expecting only one path pointing to data subdir
+        if len(paths) != 1:
+            raise SonusAIError(f'mcgill-speech only support a single path')
+        prompt_fpath = join(join(realpath(abspath(paths[0]))), '../Scripts/HarvardList.dat')
+        with open(prompt_fpath, encoding='utf-8') as f:
+            lines = f.readlines()
+        logger.info(f'Found {len(lines) - 4} entries in prompt file.')
+        # First 4 lines are header stuff, can use remaining directly with simple lookup
+        # example line: '01_02:Glue the sheet ...\n' (paragraph 1, sentence 2)
+        # 11 entries per group, so getting line is 11*(p1-1)+(s2-1)
+        lines = lines[4:]
+        processing_func = partial(get_mcgill_speech_manifest_entry, transcript_data=lines)
+        progress = tqdm(total=len(entries), desc='Creating Mcgill Speech manifest data')
+        results = pp_tqdm_imap(processing_func, entries, progress=progress)
+        progress.close()
+        with open(output, 'w') as f:
+            for result in results:
+                f.write(json.dumps(result) + '\n')
     end_time = time.monotonic()
     logger.info('')
     logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')
@@ -167,8 +206,4 @@ def main() -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/mkwav.py CHANGED Viewed

@@ -23,13 +23,25 @@ Outputs the following to the mixture database directory:
     mkwav.log
 """
+import signal
 from dataclasses import dataclass
-from sonusai import logger
 from sonusai.mixture import AudioT
 from sonusai.mixture import MixtureDatabase
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 @dataclass
 class MPGlobal:
     mixdb: MixtureDatabase = None
@@ -120,6 +132,7 @@ def main() -> None:
     import sonusai
     from sonusai import create_file_handler
     from sonusai import initial_log_messages
+    from sonusai import logger
     from sonusai import update_console_handler
     from sonusai.mixture import check_audio_files_exist
     from sonusai.utils import pp_tqdm_imap
@@ -164,8 +177,4 @@ def main() -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/onnx_predict.py CHANGED Viewed

@@ -29,12 +29,25 @@ Outputs the following to opredict-<TIMESTAMP> directory:
 """
-from sonusai import logger
+import signal
 from sonusai.mixture import Feature
 from sonusai.mixture import Predict
 from sonusai.utils import SonusAIMetaData
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 def main() -> None:
     from docopt import docopt
@@ -60,6 +73,7 @@ def main() -> None:
     from sonusai import create_file_handler
     from sonusai import initial_log_messages
+    from sonusai import logger
     from sonusai import update_console_handler
     from sonusai.mixture import MixtureDatabase
     from sonusai.mixture import get_feature_from_audio
@@ -233,8 +247,4 @@ def pad_and_predict(feature: Feature,
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/plot.py CHANGED Viewed

@@ -41,16 +41,29 @@ Outputs:
 """
+import signal
 import numpy as np
 from matplotlib import pyplot as plt
-from sonusai import logger
 from sonusai.mixture import AudioT
 from sonusai.mixture import Feature
 from sonusai.mixture import Predict
 from sonusai.mixture import Truth
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 def spec_plot(mixture: AudioT,
               feature: Feature,
               predict: Predict = None,
@@ -264,6 +277,7 @@ def main() -> None:
     from sonusai import SonusAIError
     from sonusai import create_file_handler
     from sonusai import initial_log_messages
+    from sonusai import logger
     from sonusai import update_console_handler
     from sonusai.mixture import MixtureDatabase
     from sonusai.mixture import FeatureGeneratorConfig
@@ -457,8 +471,4 @@ def main() -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/post_spenh_targetf.py CHANGED Viewed

@@ -20,9 +20,20 @@ Outputs the following to post_spenh_targetf-<TIMESTAMP> directory:
     post_spenh_targetf.log
 """
+import signal
 from dataclasses import dataclass
-from sonusai import logger
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 @dataclass
@@ -146,8 +157,4 @@ def _process(file: str) -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        exit()
+    main()

sonusai/summarize_metric_spenh.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""sonusai summarize_metric_spenh
+usage: summarize_metric_spenh [-hr] [-s SORT] LOC
+options:
+    -h, --help
+    -s SORT, --sort SORT            Sort by SORT column. [default: MIXID]
+    -r, --reverse                   Reverse sort order.
+Summarize speech enhancement metrics results using data generated by SonusAI calc_metric_spenh.
+Inputs:
+    LOC         A SonusAI calc_metric_spenh results directory.
+"""
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
+def summarize_metric_spenh(location: str, by: str = 'MIXID', reverse: bool = False) -> str:
+    import glob
+    import pandas as pd
+    files = sorted(glob.glob(location + '/*_metric_spenh.txt'))
+    need_header = True
+    header = ['MIXID']
+    data = []
+    for file in files:
+        with open(file, 'r') as f:
+            for i, line in enumerate(f):
+                if i == 1 and need_header:
+                    need_header = False
+                    header.extend(line.strip().split())
+                elif i == 2:
+                    data.append(line.strip().split())
+                    break
+    df = pd.DataFrame(data, columns=header)
+    df[header[0:-2]] = df[header[0:-2]].apply(pd.to_numeric, errors='coerce')
+    return df.sort_values(by=by, ascending=not reverse).to_string(index=False)
+def main():
+    from docopt import docopt
+    import sonusai
+    from sonusai.utils import trim_docstring
+    args = docopt(trim_docstring(__doc__), version=sonusai.__version__, options_first=True)
+    by = args['--sort']
+    reverse = args['--reverse']
+    location = args['LOC']
+    print(summarize_metric_spenh(location, by, reverse))
+if __name__ == '__main__':
+    main()

sonusai/tplot.py CHANGED Viewed

@@ -41,7 +41,19 @@ options:
    A multi-page plot TARGET-tplot.pdf or CONFIG-tplot.pdf is generated.
 """
-from sonusai import logger
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
 # TODO: re-work for modern mixdb API
@@ -328,8 +340,4 @@ def main() -> None:
 if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        logger.info('Canceled due to keyboard interrupt')
-        raise SystemExit(0)
+    main()

sonusai/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # SonusAI general utilities
 from .asl_p56 import asl_p56
+from .asr import ASRData
 from .asr import ASRResult
 from .asr import calc_asr
 from .audio_devices import get_default_input_device
@@ -14,24 +15,21 @@ from .create_ts_name import create_ts_name
 from .dataclass_from_dict import dataclass_from_dict
 from .db import db_to_linear
 from .db import linear_to_db
+from .docstring import add_commands_to_docstring
+from .docstring import trim_docstring
 from .energy_f import compute_energy_f
 from .engineering_number import EngineeringNumber
 from .get_frames_per_batch import get_frames_per_batch
 from .get_label_names import get_label_names
 from .grouper import grouper
 from .human_readable_size import human_readable_size
-from .keras_utils import check_keras_overrides
-from .keras_utils import create_onnx_from_keras
-from .keras_utils import import_and_check_keras_model
-from .keras_utils import import_keras_model
-from .keras_utils import keras_onnx
 from .max_text_width import max_text_width
+from .model_utils import import_module
 from .numeric_conversion import float_to_int16
 from .numeric_conversion import int16_to_float
 from .onnx_utils import SonusAIMetaData
 from .onnx_utils import add_sonusai_metadata
 from .onnx_utils import get_sonusai_metadata
-from .onnx_utils import replace_stateful_grus
 from .parallel import pp_imap
 from .parallel import pp_tqdm_imap
 from .print_mixture_details import print_class_count
@@ -50,6 +48,5 @@ from .stacked_complex import stacked_complex_imag
 from .stacked_complex import stacked_complex_real
 from .stacked_complex import unstack_complex
 from .stratified_shuffle_split import stratified_shuffle_split_mixid
-from .trim_docstring import trim_docstring
 from .wave import write_wav
 from .yes_or_no import yes_or_no

sonusai/utils/asl_p56.py CHANGED Viewed

@@ -22,7 +22,7 @@ def asl_p56(audio: AudioT) -> float:
     # Hangover time in seconds
     H = 0.2
     # Rounded up to next integer
-    I = np.ceil(H * SAMPLE_RATE)
+    H_samples = np.ceil(H * SAMPLE_RATE)
     # Margin in dB, difference between threshold and active speech level
     M = 15.9
@@ -40,7 +40,7 @@ def asl_p56(audio: AudioT) -> float:
     a = np.full(thresh_num, -1)
     # Hangover counter for each threshold
-    h = np.full(thresh_num, I)
+    h = np.full(thresh_num, H_samples)
     # Long-term level square energy of audio
     sq = sum(np.square(audio))
@@ -55,7 +55,7 @@ def asl_p56(audio: AudioT) -> float:
             if q[k] >= c[j]:
                 a[j] = a[j] + 1
                 h[j] = 0
-            elif h[j] < I:
+            elif h[j] < H_samples:
                 a[j] = a[j] + 1
                 h[j] = h[j] + 1
             else:

sonusai 0.15.9__py3-none-any.whl → 0.16.1__py3-none-any.whl

sonusai 0.15.9py3-none-any.whl → 0.16.1py3-none-any.whl