PyPI - sonusai - Versions diffs - 0.15.9__tar.gz → 0.16.0__tar.gz - Mend

sonusai 0.15.9tar.gz → 0.16.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

{sonusai-0.15.9 → sonusai-0.16.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonusai
-Version: 0.15.9
+Version: 0.16.0
 Summary: Framework for building deep neural network models for sound, speech, and voice AI
 Home-page: https://aaware.com
 License: GPL-3.0-only
@@ -15,57 +15,39 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
-Requires-Dist: aixplain (>=0.2.6,<0.3.0)
-Requires-Dist: bitarray (>=2.9.2,<3.0.0)
-Requires-Dist: ctranslate2 (==4.1.0)
 Requires-Dist: dataclasses-json (>=0.6.1,<0.7.0)
-Requires-Dist: deepgram-sdk (>=3.0.0,<4.0.0)
 Requires-Dist: docopt (>=0.6.2,<0.7.0)
-Requires-Dist: einops (>=0.7.0,<0.8.0)
-Requires-Dist: faster-whisper (>=1.0.1,<2.0.0)
-Requires-Dist: geomloss (>=0.2.6,<0.3.0)
 Requires-Dist: h5py (>=3.11.0,<4.0.0)
-Requires-Dist: hydra-core (>=1.3.2,<2.0.0)
 Requires-Dist: jiwer (>=3.0.3,<4.0.0)
-Requires-Dist: keras (>=3.1.1,<4.0.0)
-Requires-Dist: keras-tuner (>=1.4.7,<2.0.0)
 Requires-Dist: librosa (>=0.10.1,<0.11.0)
-Requires-Dist: lightning (>=2.2,<2.3)
 Requires-Dist: matplotlib (>=3.8.0,<4.0.0)
-Requires-Dist: omegaconf (>=2.3.0,<3.0.0)
 Requires-Dist: onnx (>=1.14.1,<2.0.0)
 Requires-Dist: onnxruntime (>=1.16.1,<2.0.0)
 Requires-Dist: paho-mqtt (>=2.0.0,<3.0.0)
 Requires-Dist: pandas (>=2.1.1,<3.0.0)
 Requires-Dist: pesq (>=0.0.4,<0.0.5)
-Requires-Dist: pyaaware (>=1.5.3,<2.0.0)
+Requires-Dist: pyaaware (>=1.5.7,<2.0.0)
 Requires-Dist: pyaudio (>=0.2.14,<0.3.0)
 Requires-Dist: pydub (>=0.25.1,<0.26.0)
 Requires-Dist: pystoi (>=0.4.0,<0.5.0)
-Requires-Dist: python-magic (>=0.4.27,<0.5.0)
 Requires-Dist: requests (>=2.31.0,<3.0.0)
-Requires-Dist: sacrebleu (>=2.4.2,<3.0.0)
 Requires-Dist: samplerate (>=0.2.1,<0.3.0)
 Requires-Dist: soundfile (>=0.12.1,<0.13.0)
 Requires-Dist: sox (>=1.4.1,<2.0.0)
-Requires-Dist: speechrecognition (>=3.10.1,<4.0.0)
-Requires-Dist: tensorflow (>=2.15.0,<3.0.0)
-Requires-Dist: tf2onnx (>=1.15.1,<2.0.0)
 Requires-Dist: torch (>=2.2,<2.3)
 Requires-Dist: torchaudio (>=2.2,<2.3)
-Requires-Dist: torchinfo (>=1.8.0,<2.0.0)
 Requires-Dist: tqdm (>=4.66.1,<5.0.0)
 Description-Content-Type: text/x-rst
-Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
+SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
-Sonus AI includes functions for pre-processing training and validation data and
+SonusAI includes functions for pre-processing training and validation data and
 creating performance metrics reports for key types of Keras models:
 - recurrent, convolutional, or a combination (i.e. RCNNs)
 - binary, multiclass single-label, multiclass multi-label, and regression
 - training with data augmentations:  noise mixing, pitch and time stretch, etc.
-Sonus AI python functions are used by:
- - Aaware Inc. sonusai executable:  Easily create train/validation data, run prediction, evaluate model performance
- - Keras model scripts:             User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
+SonusAI python functions are used by:
+ - Aaware Inc. sonusai framework:   Easily create train/validation data, run prediction, evaluate model performance
+ - Keras model scripts:             User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.

{sonusai-0.15.9 → sonusai-0.16.0}/README.rst RENAMED Viewed

@@ -1,11 +1,11 @@
-Sonus AI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
+SonusAI: Framework for simplified creation of deep NN models for sound, speech, and voice AI
-Sonus AI includes functions for pre-processing training and validation data and
+SonusAI includes functions for pre-processing training and validation data and
 creating performance metrics reports for key types of Keras models:
 - recurrent, convolutional, or a combination (i.e. RCNNs)
 - binary, multiclass single-label, multiclass multi-label, and regression
 - training with data augmentations:  noise mixing, pitch and time stretch, etc.
-Sonus AI python functions are used by:
- - Aaware Inc. sonusai executable:  Easily create train/validation data, run prediction, evaluate model performance
- - Keras model scripts:             User python scripts for keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for trainining rnn-based models like CRNN's, DSCRNN's, etc. in Keras
+SonusAI python functions are used by:
+ - Aaware Inc. sonusai framework:   Easily create train/validation data, run prediction, evaluate model performance
+ - Keras model scripts:             User python scripts for Keras model creation, training, and prediction. These can use sonusai-specific data but also some general useful utilities for training rnn-based models like CRNN's, DSCRNN's, etc. in Keras.

{sonusai-0.15.9 → sonusai-0.16.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sonusai"
-version = "0.15.9"
+version = "0.16.0"
 description = "Framework for building deep neural network models for sound, speech, and voice AI"
 authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
 maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -15,60 +15,37 @@ aawscd_probwrite = 'sonusai.aawscd_probwrite:main'
 [tool.poetry.dependencies]
 PyYAML = "^6.0.1"
-aixplain = "^0.2.6"
-bitarray = "^2.9.2"
-ctranslate2 = "4.1.0"
 dataclasses-json = "^0.6.1"
-deepgram-sdk = "^3.0.0"
 docopt = "^0.6.2"
-einops = "^0.7.0"
-faster-whisper = "^1.0.1"
-geomloss = "^0.2.6"
 h5py = "^3.11.0"
-hydra-core = "^1.3.2"
 jiwer = "^3.0.3"
-keras = "^3.1.1"
-keras-tuner = "^1.4.7"
 librosa = "^0.10.1"
-lightning = "~2.2"
 matplotlib = "^3.8.0"
-omegaconf = "^2.3.0"
 onnx = "^1.14.1"
-#onnxruntime-gpu = "^1.16.1"
 onnxruntime = "^1.16.1"
-#openai-whisper = "^20231117"
 paho-mqtt = "^2.0.0"
 pandas = "^2.1.1"
 pesq = "^0.0.4"
-pyaaware = "^1.5.3"
+pyaaware = "^1.5.7"
 pyaudio = "^0.2.14"
 pydub = "^0.25.1"
 pystoi = "^0.4.0"
 python = ">=3.9,<3.12"
-python-magic = "^0.4.27"
 requests = "^2.31.0"
-sacrebleu = "^2.4.2"
 samplerate = "^0.2.1"
 soundfile = "^0.12.1"
-speechrecognition = "^3.10.1"
 sox = "^1.4.1"
-tensorflow = "^2.15.0"
-tf2onnx = "^1.15.1"
 torch = "~2.2"
 torchaudio = "~2.2"
-torchinfo = "^1.8.0"
 tqdm = "^4.66.1"
 [tool.poetry.group.dev.dependencies]
 icecream = "^2.1.3"
-ipython = "^8.16.1"
-jupyter = "^1.0.0"
 mypy = "^1.6.0"
 mypy-extensions = "^1.0.0"
 pytest = "^8.1.1"
 types-pyyaml = "^6.0.12.12"
 types-requests = "^2.31.0.8"
-yappi = "^1.4.0"
 [tool.mypy]
 ignore_missing_imports = true

sonusai-0.16.0/sonusai/__init__.py ADDED Viewed

@@ -0,0 +1,86 @@
+import logging
+from importlib import metadata
+from os.path import dirname
+__version__ = metadata.version(__package__)
+BASEDIR = dirname(__file__)
+commands_doc = """
+   audiofe                      Audio front end
+   calc_metric_spenh            Run speech enhancement and analysis
+   doc                          Documentation
+   genft                        Generate feature and truth data
+   genmix                       Generate mixture and truth data
+   genmixdb                     Generate a mixture database
+   gentcst                      Generate target configuration from a subdirectory tree
+   lsdb                         List information about a mixture database
+   mkmanifest                   Make ASR manifest JSON file
+   mkwav                        Make WAV files from a mixture database
+   onnx_predict                 Run ONNX predict on a trained model
+   plot                         Plot mixture data
+   post_spenh_targetf           Run post-processing for speech enhancement targetf data
+   tplot                        Plot truth data
+   vars                         List custom SonusAI variables
+"""
+# create logger
+logger = logging.getLogger('sonusai')
+logger.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(message)s')
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.DEBUG)
+console_handler.setFormatter(formatter)
+logger.addHandler(console_handler)
+class SonusAIError(Exception):
+    def __init__(self, value):
+        logger.error(value)
+# create file handler
+def create_file_handler(filename: str) -> None:
+    fh = logging.FileHandler(filename=filename, mode='w')
+    fh.setLevel(logging.DEBUG)
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+# update console handler
+def update_console_handler(verbose: bool) -> None:
+    if not verbose:
+        logger.removeHandler(console_handler)
+        console_handler.setLevel(logging.INFO)
+        logger.addHandler(console_handler)
+# write initial log message
+def initial_log_messages(name: str, subprocess: str = None) -> None:
+    from datetime import datetime
+    from getpass import getuser
+    from os import getcwd
+    from socket import gethostname
+    from sys import argv
+    if subprocess is None:
+        logger.info(f'SonusAI {__version__}')
+    else:
+        logger.info(f'SonusAI {subprocess}')
+    logger.info(f'{name}')
+    logger.info('')
+    logger.debug(f'Host:      {gethostname()}')
+    logger.debug(f'User:      {getuser()}')
+    logger.debug(f'Directory: {getcwd()}')
+    logger.debug(f'Date:      {datetime.now()}')
+    logger.debug(f'Command:   {" ".join(argv)}')
+    logger.debug('')
+def commands_list(doc: str = commands_doc) -> list[str]:
+    lines = doc.split('\n')
+    commands = []
+    for line in lines:
+        command = line.strip().split(' ').pop(0)
+        if command:
+            commands.append(command)
+    return commands

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/audiofe.py RENAMED Viewed

@@ -34,7 +34,6 @@ audiofe_<TIMESTAMP>.h5.
 from os.path import exists
 from select import select
 from sys import stdin
-from typing import Any
 import h5py
 import numpy as np
@@ -58,7 +57,7 @@ from sonusai.utils import calc_asr
 from sonusai.utils import create_timestamp
 from sonusai.utils import get_input_device_index_by_name
 from sonusai.utils import get_input_devices
-from sonusai.utils import import_keras_model
+from sonusai.utils import load_torchl_ckpt_model
 from sonusai.utils import trim_docstring
 from sonusai.utils import write_wav
@@ -124,7 +123,8 @@ def main() -> None:
         logger.info(f'Capture audio ASR: {capture_asr}')
     if model_name is not None:
-        model = load_model(model_name=model_name, ckpt_name=ckpt_name)
+        model = load_torchl_ckpt_model(model_name=model_name, ckpt_name=ckpt_name)
+        model.eval()
         feature = get_feature_from_audio(audio=capture_audio, feature_mode=model.hparams.feature)
         if debug:
@@ -149,7 +149,8 @@ def main() -> None:
         #     logger.info(f'Wrote reconstruct audio with shape {reconstruct_audio.shape} to {h5_name}')
         with torch.no_grad():
-            predict = model(torch.tensor(feature))
+            # model wants batch x timesteps x feature_parameters
+            predict = model(torch.tensor(feature).permute((1, 0, 2))).permute(1, 0, 2).numpy()
         if debug:
             with h5py.File(h5_name, 'a') as f:
                 if 'predict' in f:
@@ -157,7 +158,7 @@ def main() -> None:
                 f.create_dataset('predict', data=predict)
             logger.info(f'Wrote predict with shape {predict.shape} to {h5_name}')
-        predict_audio = get_audio_from_feature(feature=predict.numpy(), feature_mode=model.hparams.feature)
+        predict_audio = get_audio_from_feature(feature=predict, feature_mode=model.hparams.feature)
         write_wav(predict_name, predict_audio, SAMPLE_RATE)
         logger.info(f'Wrote predict audio with shape {predict_audio.shape} to {predict_name}')
         if debug:
@@ -172,63 +173,6 @@ def main() -> None:
             logger.info(f'Predict audio ASR: {predict_asr}')
-def load_model(model_name: str, ckpt_name: str) -> Any:
-    batch_size = 1
-    timesteps = 0
-    # Load checkpoint first to get hparams if available
-    try:
-        checkpoint = torch.load(ckpt_name, map_location=lambda storage, loc: storage)
-    except Exception as e:
-        logger.exception(f'Error: could not load checkpoint from {ckpt_name}: {e}')
-        raise SystemExit(1)
-    # Import model definition file
-    logger.info(f'Importing {model_name}')
-    litemodule = import_keras_model(model_name)
-    if 'hyper_parameters' in checkpoint:
-        logger.info(f'Found checkpoint file with hyper-parameters')
-        hparams = checkpoint['hyper_parameters']
-        if hparams['batch_size'] != batch_size:
-            logger.info(
-                f'Overriding model default batch_size of {hparams["batch_size"]} with batch_size of {batch_size}')
-            hparams["batch_size"] = batch_size
-        if hparams['timesteps'] != 0 and timesteps == 0:
-            timesteps = hparams['timesteps']
-            logger.warning(f'Using model default timesteps of {timesteps}')
-        logger.info(f'Building model with {len(hparams)} total hparams')
-        try:
-            model = litemodule.MyHyperModel(**hparams)
-        except Exception as e:
-            logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
-            raise SystemExit(1)
-    else:
-        logger.info(f'Found checkpoint file with no hyper-parameters')
-        logger.info(f'Building model with defaults')
-        try:
-            tmp = litemodule.MyHyperModel()
-        except Exception as e:
-            logger.exception(f'Error: model build (MyHyperModel) in {model_name} failed: {e}')
-            raise SystemExit(1)
-        if tmp.batch_size != batch_size:
-            logger.info(f'Overriding model default batch_size of {tmp.batch_size} with batch_size of {batch_size}')
-        if tmp.timesteps != 0 and timesteps == 0:
-            timesteps = tmp.timesteps
-            logger.warning(f'Using model default timesteps of {timesteps}')
-        model = litemodule.MyHyperModel(timesteps=timesteps, batch_size=batch_size)
-    logger.info(f'Loading weights from {ckpt_name}')
-    model.load_state_dict(checkpoint["state_dict"])
-    model.eval()
-    return model
 def get_frames_from_device(input_name: str | None, length: float, chunk: int = 1024) -> AudioT:
     p = pyaudio.PyAudio()

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/calc_metric_spenh.py RENAMED Viewed

@@ -758,13 +758,18 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
             predict = stack_complex(predict)
     # 2) Collect true target, noise, mixture data, trim to predict size if needed
-    target = mixdb.mixture_target(mixid)
-    target_f = mixdb.mixture_target_f(mixid, target=target)
-    noise = mixdb.mixture_noise(mixid)
-    noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
-    mixture = mixdb.mixture_mixture(mixid, target=target, noise=noise)
+    tmp = mixdb.mixture_targets(mixid)  # targets is list of pre-IR and pre-specaugment targets
+    target_f = mixdb.mixture_targets_f(mixid, targets=tmp)[0]
+    target = tmp[0]
+    mixture = mixdb.mixture_mixture(mixid)  # note: gives full reverberated/distorted target, but no specaugment
+    # noise_wodist = mixdb.mixture_noise(mixid)            # noise without specaugment and distortion
+    # noise_wodist_f = mixdb.mixture_noise_f(mixid, noise=noise_wodist)
+    noise = mixture - target  # has time-domain distortion (ir,etc.) but does not have specaugment
+    # noise_f = mixdb.mixture_noise_f(mixid, noise=noise)
+    segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)  # note: uses pre-IR, pre-specaug audio
     mixture_f = mixdb.mixture_mixture_f(mixid, mixture=mixture)
-    segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
+    noise_f = mixture_f - target_f  # true noise in freq domain includes specaugment and time-domain ir,distortions
+    # segsnr_f = mixdb.mixture_segsnr(mixid, target=target, noise=noise)
     segsnr_f[segsnr_f == inf] = 7.944e8  # 99db
     segsnr_f[segsnr_f == -inf] = 1.258e-10  # -99db
     # need to use inv-tf to match #samples & latency shift properties of predict inv tf
@@ -920,8 +925,9 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
                           'NLERR': lerr_n_frame,
                           'SPD':   phd_frame})
     metr2 = metr2.describe()  # Use pandas stat function
-    metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(
-        lambda x: 10 * np.log10(x + 1.01e-10))  # Change SSNR stats to dB, except count
+    # Change SSNR stats to dB, except count.  SSNR is index 0, pandas requires using iloc
+    # metr2['SSNR'][1:] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
+    metr2.iloc[1:, 0] = metr2['SSNR'][1:].apply(lambda x: 10 * np.log10(x + 1.01e-10))
     # create a single row in multi-column header
     new_labels = pd.MultiIndex.from_product([metr2.columns,
                                              ['Avg', 'Min', 'Med', 'Max', 'Std']],
@@ -978,11 +984,11 @@ def _process_mixture(mixid: int) -> tuple[pd.DataFrame, pd.DataFrame]:
         plot_fname = base_name + '_metric_spenh.pdf'
         # Reshape feature to eliminate overlap redundancy for easier to understand spectrogram view
-        # Original size (frames, stride, feature_parameters), decimates in stride dimension only if step is > 1
-        # Reshape to get frames*decimated_stride, feature_parameters
+        # Original size (frames, stride, num_bands), decimates in stride dimension only if step is > 1
+        # Reshape to get frames*decimated_stride, num_bands
         step = int(mixdb.feature_samples / mixdb.feature_step_samples)
         if feature.ndim != 3:
-            raise SonusAIError(f'feature does not have 3 dimensions: frames, stride, feature_parameters')
+            raise SonusAIError(f'feature does not have 3 dimensions: frames, stride, num_bands')
         # for feature cn*00n**
         feat_sgram = unstack_complex(feature)
@@ -1166,7 +1172,7 @@ def main():
     # Individual mixtures use pandas print, set precision to 2 decimal places
     # pd.set_option('float_format', '{:.2f}'.format)
     progress = tqdm(total=len(mixids), desc='calc_metric_spenh')
-    all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=None)
+    all_metrics_tables = pp_tqdm_imap(_process_mixture, mixids, progress=progress, num_cpus=8)
     progress.close()
     all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])
@@ -1192,6 +1198,7 @@ def main():
         if ~np.isnan(tmp.iloc[0].to_numpy()[0]).any():
             mtab_snr_summary_em = pd.concat([mtab_snr_summary_em, tmp])
+    mtab_snr_summary = mtab_snr_summary.sort_values(by=['MXSNR'], ascending=False)
     # Correct percentages in snr summary table
     mtab_snr_summary['PESQi%'] = 100 * (mtab_snr_summary['PESQ'] - mtab_snr_summary['MXPESQ']) / np.maximum(
         mtab_snr_summary['MXPESQ'], 0.01)
@@ -1202,9 +1209,11 @@ def main():
             else:
                 mtab_snr_summary['WERi%'].iloc[i] = -999.0
         else:
-            mtab_snr_summary['WERi%'].iloc[i] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
-                                                       mtab_snr_summary['WER'].iloc[i]) / \
-                                                mtab_snr_summary['MXWER'].iloc[i]
+            if ~np.isnan(mtab_snr_summary['WER'].iloc[i]) and ~np.isnan(mtab_snr_summary['MXWER'].iloc[i]):
+                # update WERi% in 6th col
+                mtab_snr_summary.iloc[i, 6] = 100 * (mtab_snr_summary['MXWER'].iloc[i] -
+                                                     mtab_snr_summary['WER'].iloc[i]) / \
+                                              mtab_snr_summary['MXWER'].iloc[i]
     # Calculate avg metrics over all mixtures except -99
     all_mtab1_sorted_nom99 = all_mtab1_sorted[all_mtab1_sorted.MXSNR != -99]

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/genmixdb.py RENAMED Viewed

@@ -225,7 +225,7 @@ def genmixdb(location: str,
     if logging:
         logger.info('Collecting impulse responses')
-    impulse_response_files = get_impulse_response_files(config, show_progress=show_progress)
+    impulse_response_files = get_impulse_response_files(config)
     populate_impulse_response_file_table(location, impulse_response_files, test)

sonusai-0.16.0/sonusai/main.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""sonusai
+usage: sonusai [--version] [--help] <command> [<args>...]
+The sonusai commands are:
+    <This information is automatically generated.>
+Aaware Sound and Voice Machine Learning Framework. See 'sonusai help <command>'
+for more information on a specific command.
+"""
+import signal
+def signal_handler(_sig, _frame):
+    import sys
+    from sonusai import logger
+    logger.info('Canceled due to keyboard interrupt')
+    sys.exit(1)
+signal.signal(signal.SIGINT, signal_handler)
+def main() -> None:
+    from importlib import import_module
+    from pkgutil import iter_modules
+    from sonusai import commands_list
+    plugins = {}
+    plugin_docstrings = []
+    for _, name, _ in iter_modules():
+        if name.startswith('sonusai_') and not name.startswith('sonusai_asr_'):
+            module = import_module(name)
+            plugins[name] = {
+                'commands': commands_list(module.commands_doc),
+                'basedir':  module.BASEDIR,
+            }
+            plugin_docstrings.append(module.commands_doc)
+    from docopt import docopt
+    from sonusai import __version__
+    from sonusai.utils import add_commands_to_docstring
+    from sonusai.utils import trim_docstring
+    args = docopt(trim_docstring(add_commands_to_docstring(__doc__, plugin_docstrings)),
+                  version=__version__,
+                  options_first=True)
+    command = args['<command>']
+    argv = args['<args>']
+    import sys
+    from os.path import join
+    from subprocess import call
+    import sonusai
+    from sonusai import logger
+    base_commands = sonusai.commands_list()
+    if command == 'help':
+        if not argv:
+            exit(call(['sonusai', '-h']))
+        elif argv[0] in base_commands:
+            exit(call(['python', f'{join(sonusai.BASEDIR, argv[0])}.py', '-h']))
+        for plugin, data in plugins.items():
+            if argv[0] in data['commands']:
+                exit(call(['python', f'{join(data["basedir"], argv[0])}.py', '-h']))
+        logger.error(f"{argv[0]} is not a SonusAI command. See 'sonusai help'.")
+        sys.exit(1)
+    if command in base_commands:
+        exit(call(['python', f'{join(sonusai.BASEDIR, command)}.py'] + argv))
+    for plugin, data in plugins.items():
+        if command in data['commands']:
+            exit(call(['python', f'{join(data["basedir"], command)}.py'] + argv))
+    logger.error(f"{command} is not a SonusAI command. See 'sonusai help'.")
+    sys.exit(1)
+if __name__ == '__main__':
+    main()

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/__init__.py RENAMED Viewed

@@ -108,6 +108,7 @@ from .helpers import get_transform_from_audio
 from .helpers import get_truth_t
 from .helpers import inverse_transform
 from .helpers import mixture_metadata
+from .helpers import read_mixture_data
 from .helpers import write_mixture_data
 from .helpers import write_mixture_metadata
 from .log_duration_and_sizes import log_duration_and_sizes

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mixture/config.py RENAMED Viewed

@@ -480,11 +480,10 @@ def append_noise_files(entry: dict | str, tokens: dict = None) -> list[dict]:
     return noise_files
-def get_impulse_response_files(config: dict, show_progress: bool = False) -> ImpulseResponseFiles:
+def get_impulse_response_files(config: dict) -> ImpulseResponseFiles:
     """Get the list of impulse response files from a config
     :param config: Config dictionary
-    :param show_progress: Show progress bar
     :return: List of impulse response files
     """
     from itertools import chain

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/mkmanifest.py RENAMED Viewed

@@ -30,6 +30,8 @@ Inputs:
                     - 'librispeech'
                     - 'vctk_noisy_speech' expects subdirs named like <name>_wav/ and <name>_txt/ with files in
                       each using same basename, but with .wav and .txt respectively.
+                    - 'mcgill-speech' expects audio data in basename/speakerid/speakerid-promptid.wav and
+                      transcript data in Scripts/HarvardLists.dat
     ADAT        Audio data environment variable. All found files will be expanded to their full, absolute path and
                 then parts of the path that match the specified environment variable value will be replaced with
                 the variable. This accommodates portability across platforms where the sound datasets may in
@@ -42,11 +44,11 @@ Outputs the following to the current directory:
 Example usage for LibriSpeech:
   sonusai mkmanifest -mlibrispeech -eADAT -oasr_manifest.json --include='*.flac' train-clean-100
+  sonusai mkmanifest -m mcgill-speech -e ADAT -o asr_manifest_16k.json 16k-LP7/
 """
 from sonusai import logger
-VALID_METHOD = ['librispeech', 'vctk_noisy_speech']
+VALID_METHOD = ['librispeech', 'vctk_noisy_speech', 'mcgill-speech']
 def main() -> None:
@@ -88,6 +90,7 @@ def main() -> None:
     from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
     from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry
     from sonusai.utils.asr_manifest_functions import get_vctk_noisy_speech_manifest_entry
+    from sonusai.utils.asr_manifest_functions import get_mcgill_speech_manifest_entry
     start_time = time.monotonic()
@@ -160,6 +163,30 @@ def main() -> None:
             for result in results:
                 f.write(json.dumps(result) + '\n')
+    if method == 'mcgill-speech':
+        logger.info(f'Found {len(entries)} Mcgill Speech files, opening prompt file ...')
+        # Note expecting only one path pointing to data subdir
+        if len(paths) != 1:
+            raise SonusAIError(f'mcgill-speech only support a single path')
+        prompt_fpath = join(join(realpath(abspath(paths[0]))), '../Scripts/HarvardList.dat')
+        with open(prompt_fpath, encoding='utf-8') as f:
+            lines = f.readlines()
+        logger.info(f'Found {len(lines) - 4} entries in prompt file.')
+        # First 4 lines are header stuff, can use remaining directly with simple lookup
+        # example line: '01_02:Glue the sheet ...\n' (paragraph 1, sentence 2)
+        # 11 entries per group, so getting line is 11*(p1-1)+(s2-1)
+        lines = lines[4:]
+        processing_func = partial(get_mcgill_speech_manifest_entry, transcript_data=lines)
+        progress = tqdm(total=len(entries), desc='Creating Mcgill Speech manifest data')
+        results = pp_tqdm_imap(processing_func, entries, progress=progress)
+        progress.close()
+        with open(output, 'w') as f:
+            for result in results:
+                f.write(json.dumps(result) + '\n')
     end_time = time.monotonic()
     logger.info('')
     logger.info(f'Completed in {seconds_to_hms(seconds=end_time - start_time)}')

{sonusai-0.15.9 → sonusai-0.16.0}/sonusai/utils/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 # SonusAI general utilities
 from .asl_p56 import asl_p56
+from .asr import ASRData
 from .asr import ASRResult
 from .asr import calc_asr
 from .audio_devices import get_default_input_device
@@ -14,24 +15,21 @@ from .create_ts_name import create_ts_name
 from .dataclass_from_dict import dataclass_from_dict
 from .db import db_to_linear
 from .db import linear_to_db
+from .docstring import add_commands_to_docstring
+from .docstring import trim_docstring
 from .energy_f import compute_energy_f
 from .engineering_number import EngineeringNumber
 from .get_frames_per_batch import get_frames_per_batch
 from .get_label_names import get_label_names
 from .grouper import grouper
 from .human_readable_size import human_readable_size
-from .keras_utils import check_keras_overrides
-from .keras_utils import create_onnx_from_keras
-from .keras_utils import import_and_check_keras_model
-from .keras_utils import import_keras_model
-from .keras_utils import keras_onnx
 from .max_text_width import max_text_width
+from .model_utils import import_module
 from .numeric_conversion import float_to_int16
 from .numeric_conversion import int16_to_float
 from .onnx_utils import SonusAIMetaData
 from .onnx_utils import add_sonusai_metadata
 from .onnx_utils import get_sonusai_metadata
-from .onnx_utils import replace_stateful_grus
 from .parallel import pp_imap
 from .parallel import pp_tqdm_imap
 from .print_mixture_details import print_class_count
@@ -50,6 +48,5 @@ from .stacked_complex import stacked_complex_imag
 from .stacked_complex import stacked_complex_real
 from .stacked_complex import unstack_complex
 from .stratified_shuffle_split import stratified_shuffle_split_mixid
-from .trim_docstring import trim_docstring
 from .wave import write_wav
 from .yes_or_no import yes_or_no

sonusai 0.15.9__tar.gz → 0.16.0__tar.gz

sonusai 0.15.9tar.gz → 0.16.0tar.gz