PyPI - sonusai - Versions diffs - 0.12.5__tar.gz → 0.12.7__tar.gz - Mend

sonusai 0.12.5tar.gz → 0.12.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

{sonusai-0.12.5 → sonusai-0.12.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonusai
-Version: 0.12.5
+Version: 0.12.7
 Summary: Framework for building deep neural network models for sound, speech, and voice AI
 Home-page: https://aaware.com
 License: GPL-3.0-only
@@ -29,7 +29,7 @@ Requires-Dist: paho-mqtt (>=1.6.1,<2.0.0)
 Requires-Dist: pandas (>=2.0.3,<3.0.0)
 Requires-Dist: pesq (>=0.0.4,<0.0.5)
 Requires-Dist: protobuf (>=4.24.0,<5.0.0)
-Requires-Dist: pyaaware (>=1.4.18,<2.0.0)
+Requires-Dist: pyaaware (>=1.4.19,<2.0.0)
 Requires-Dist: python-magic (>=0.4.27,<0.5.0)
 Requires-Dist: scikit-learn (>=1.3.0,<2.0.0)
 Requires-Dist: sh (>=1.14.3,<2.0.0)

{sonusai-0.12.5 → sonusai-0.12.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sonusai"
-version = "0.12.5"
+version = "0.12.7"
 description = "Framework for building deep neural network models for sound, speech, and voice AI"
 authors = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
 maintainers = ["Chris Eddington <chris@aaware.com>", "Jason Calderwood <jason@aaware.com>"]
@@ -31,7 +31,7 @@ paho-mqtt = "^1.6.1"
 pandas = "^2.0.3"
 pesq = "^0.0.4"
 protobuf = "^4.24.0"
-pyaaware = "^1.4.18"
+pyaaware = "^1.4.19"
 python = ">=3.8,<3.11"
 python-magic = "^0.4.27"
 scikit-learn = "^1.3.0"

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/calc_metric_spenh.py RENAMED Viewed

@@ -710,7 +710,7 @@ def _process_mixture(mixid: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
     return all_metrics_table_1, all_metrics_table_2
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai
@@ -808,7 +808,7 @@ def main():
     # Individual mixtures use pandas print, set precision to 2 decimal places
     # pd.set_option('float_format', '{:.2f}'.format)
     progress = tqdm(total=len(mixids))
-    all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress)
+    all_metrics_tables = p_tqdm_map(_process_mixture, mixids, progress=progress, chunksize=10)
     progress.close()
     all_metrics_table_1 = pd.concat([item[0] for item in all_metrics_tables])

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/data_generator/keras_from_mixdb.py RENAMED Viewed

@@ -65,9 +65,9 @@ class KerasFromMixtureDatabase(Sequence):
         self.stride = self.mixdb.fg.stride
         self.num_bands = self.mixdb.fg.num_bands
         self.num_classes = self.mixdb.num_classes
-        self.mixture_frame_segments = None
-        self.batch_frame_segments = None
-        self.total_batches: Optional[int]
+        self.mixture_frame_segments: Optional[int] = None
+        self.batch_frame_segments: Optional[int] = None
+        self.total_batches: Optional[int] = None
         self._initialize_mixtures()

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/evaluate.py RENAMED Viewed

@@ -186,7 +186,7 @@ def evaluate(mixdb: MixtureDatabase,
             classdf.round(3).to_csv(join(output_dir, f'class_snr{snri}.csv'))
-def main():
+def main() -> None:
     from datetime import datetime
     from os import mkdir
     from os.path import join

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genft.py RENAMED Viewed

@@ -72,7 +72,7 @@ def genft(mixdb: MixtureDatabase,
             results.append(_genft_kernel(mixid))
     else:
         progress = tqdm(total=len(mixids), disable=not show_progress)
-        results = p_tqdm_map(_genft_kernel, mixids, progress=progress)
+        results = p_tqdm_map(_genft_kernel, mixids, progress=progress, chunksize=10)
         progress.close()
     return results
@@ -100,7 +100,7 @@ def _genft_kernel(mixid: int) -> GenFTData:
     return GenFTData(feature=feature, truth_f=truth_f, segsnr=segsnr)
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmix.py RENAMED Viewed

@@ -79,7 +79,7 @@ def genmix(mixdb: MixtureDatabase,
             results.append(_genmix_kernel(mixid))
     else:
         progress = tqdm(total=len(mixids), disable=not show_progress)
-        results = p_tqdm_map(_genmix_kernel, mixids, progress=progress)
+        results = p_tqdm_map(_genmix_kernel, mixids, progress=progress, chunksize=10)
         progress.close()
     return results
@@ -128,7 +128,7 @@ def _genmix_kernel(mixid: int) -> GenMixData:
                       segsnr_t=segsnr_t)
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/genmixdb.py RENAMED Viewed

@@ -141,7 +141,6 @@ def genmixdb(location: Location,
              show_progress: bool = False,
              test_mode: bool = False,
              use_cache: bool = True) -> MixtureDatabase:
-    import itertools
     from random import seed
     import yaml
@@ -155,7 +154,6 @@ def genmixdb(location: Location,
     from sonusai.mixture import SAMPLE_RATE
     from sonusai.mixture import TruthSettings
     from sonusai.mixture import balance_targets
-    from sonusai.mixture import estimate_augmented_length_from_audio
     from sonusai.mixture import generate_mixtures
     from sonusai.mixture import get_augmentation_indices_for_mixup
     from sonusai.mixture import get_augmentations
@@ -286,19 +284,11 @@ def genmixdb(location: Location,
     augmented_targets = balance_targets(mixdb, augmented_targets)
-    augmented_noise_iter = list(itertools.product(*[range(len(mixdb.noises)), range(len(mixdb.noise_augmentations))]))
-    noise_audio_samples = sum([mixdb.augmented_noise_length(nfi, nai) for nfi, nai in augmented_noise_iter])
-    if logging:
-        logger.info('Generating mixtures first pass')
-    used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
-                                                             augmented_targets=augmented_targets,
-                                                             noise_files=noise_files,
-                                                             noise_augmentations=noise_augmentations,
-                                                             mixups=mixups,
-                                                             show_progress=show_progress)
     total_noise_files = len(mixdb.noises) * len(mixdb.noise_augmentations)
+    aug_noise_audio_samples = mixdb.augmented_noise_samples
+    total_target_files = len(augmented_targets)
+    aug_target_audio_samples = mixdb.augmented_target_samples
     if logging:
         raw_target_audio_samples = sum([targets.samples for targets in mixdb.targets])
@@ -312,24 +302,19 @@ def genmixdb(location: Location,
                     f'{human_readable_size(raw_noise_audio_duration * SAMPLE_RATE * SAMPLE_BYTES, 1)}, '
                     f'{seconds_to_hms(seconds=raw_noise_audio_duration)}')
-        augmented_noise_audio_samples = float(sum([mixdb.augmented_noise_length(f, a) for f, a in
-                                                   zip(range(len(mixdb.noises)),
-                                                       range(len(mixdb.noise_augmentations)))]))
-        augmented_target_audio_samples = 0
-        for augmented_target in augmented_targets:
-            augmented_target_audio_samples += estimate_augmented_length_from_audio(
-                audio=mixdb.raw_target_audio(augmented_target.target_file_index),
-                augmentation=mixdb.target_augmentations[augmented_target.target_augmentation_index],
-                length_common_denominator=mixdb.feature_step_samples)
         logger.info('')
-        logger.info(f'Augmented target audio: {len(augmented_targets)} files, '
-                    f'{human_readable_size(augmented_target_audio_samples * SAMPLE_BYTES, 1)}, '
-                    f'{seconds_to_hms(seconds=augmented_target_audio_samples / SAMPLE_RATE)}')
+        logger.info(f'Augmented target audio: {total_target_files} files, '
+                    f'{human_readable_size(aug_target_audio_samples * SAMPLE_BYTES, 1)}, '
+                    f'{seconds_to_hms(seconds=aug_target_audio_samples / SAMPLE_RATE)}')
         logger.info(f'Augmented noise audio: {total_noise_files} files, '
-                    f'{human_readable_size(augmented_noise_audio_samples * SAMPLE_BYTES, 1)}, '
-                    f'{seconds_to_hms(seconds=augmented_noise_audio_samples / SAMPLE_RATE)}')
+                    f'{human_readable_size(aug_noise_audio_samples * SAMPLE_BYTES, 1)}, '
+                    f'{seconds_to_hms(seconds=aug_noise_audio_samples / SAMPLE_RATE)}')
+    used_noise_files, used_noise_samples = generate_mixtures(mixdb=mixdb,
+                                                             augmented_targets=augmented_targets,
+                                                             noise_files=noise_files,
+                                                             noise_augmentations=noise_augmentations,
+                                                             mixups=mixups)
     total_mixtures = len(mixdb.mixtures)
     if logging:
         logger.info('')
@@ -358,9 +343,9 @@ def genmixdb(location: Location,
     MP_GLOBAL.save_segsnr = save_segsnr
     if logging:
-        logger.info('Generating mixtures second pass')
+        logger.info('Generating mixtures')
     progress = tqdm(total=total_mixtures, disable=not show_progress)
-    mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress)
+    mixdb.mixtures = p_tqdm_map(_process_mixture, range(total_mixtures), progress=progress, chunksize=10)
     progress.close()
     total_samples = mixdb.total_samples()
@@ -374,7 +359,7 @@ def genmixdb(location: Location,
                                stride=mixdb.fg.stride,
                                desc='Actual')
         noise_files_percent = (float(used_noise_files) / float(total_noise_files)) * 100
-        noise_samples_percent = (float(used_noise_samples) / float(noise_audio_samples)) * 100
+        noise_samples_percent = (float(used_noise_samples) / float(aug_noise_audio_samples)) * 100
         logger.info('')
         logger.info(f'Used {noise_files_percent:,.0f}% of augmented noise files')
         logger.info(f'Used {noise_samples_percent:,.0f}% of augmented noise audio')
@@ -431,7 +416,7 @@ def _process_mixture(mixid: int) -> MRecord:
     return mrecord
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/gentcst.py RENAMED Viewed

@@ -579,7 +579,7 @@ def report_leaf_fold_data_usage(all_files: List[FileInfo], use_files: List[FileI
         logger.warning('')
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_onnx.py RENAMED Viewed

@@ -30,7 +30,7 @@ Results are written into subdirectory <MODEL>-<TIMESTAMP> unless OUTPUT is speci
 from sonusai import logger
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_predict.py RENAMED Viewed

@@ -40,7 +40,7 @@ from sonusai.mixture import Feature
 from sonusai.mixture import Predict
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/keras_train.py RENAMED Viewed

@@ -80,7 +80,7 @@ class SonusAIModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
                 f.attrs['sonusai_num_classes'] = str(self.num_classes)
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai
@@ -285,9 +285,9 @@ def main():
     # Find checkpoint file and load weights for prediction and model save
     checkpoint_name = None
     for path, dirs, files in walk(output_dir):
-        for f in files:
-            if "ckpt" in f:
-                checkpoint_name = f
+        for file in files:
+            if "ckpt" in file:
+                checkpoint_name = file
     if checkpoint_name is not None:
         logger.info('Using best checkpoint for prediction and model exports')

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/lsdb.py RENAMED Viewed

@@ -109,7 +109,7 @@ def lsdb(mixdb: MixtureDatabase,
             logger.info(f'  {snr_mean[c]:8.2f}  {snr_std[c]:8.2f}  {snr_db_mean[c]:8.2f}  {snr_db_std[c]:8.2f}')
-def main():
+def main() -> None:
     from docopt import docopt
     import sonusai

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/main.py RENAMED Viewed

@@ -4,7 +4,6 @@ usage: sonusai [--version] [--help] <command> [<args>...]
 The sonusai commands are:
    calc_metric_spenh            Run speech enhancement and analysis
-   calc_metric_spenh_targetf    Run speech enhancement and analysis for targetf truth (deprecated)
    evaluate                     Evaluate model performance
    genft                        Generate feature and truth data
    genmix                       Generate mixture and truth data
@@ -14,6 +13,7 @@ The sonusai commands are:
    keras_train                  Train a model using Keras
    keras_onnx                   Convert a trained Keras model to ONNX
    lsdb                         List information about a mixture database
+   mkmanifest                   Make ASR manifest JSON file
    mkwav                        Make WAV files from a mixture database
    onnx_predict                 Run ONNX predict on a trained model
    plot                         Plot mixture data
@@ -30,18 +30,14 @@ for more information on a specific command.
 from sonusai import logger
-def main():
-    from subprocess import call
+def main() -> None:
     from docopt import docopt
     import sonusai
-    from sonusai import SonusAIError
     from sonusai.utils import trim_docstring
     commands = (
         'calc_metric_spenh',
-        'calc_metric_spenh_targetf',
         'evaluate',
         'genft',
         'genmix',
@@ -51,6 +47,7 @@ def main():
         'keras_train',
         'keras_onnx',
         'lsdb',
+        'mkmanifest',
         'mkwav',
         'onnx_predict',
         'plot',
@@ -66,6 +63,11 @@ def main():
     command = args['<command>']
     argv = args['<args>']
+    from subprocess import call
+    import sonusai
+    from sonusai import SonusAIError
     if command == 'help':
         if not argv:
             exit(call(['sonusai', '-h']))

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # SonusAI mixture utilities
-from sonusai.mixture.audio import AawareSoxTransformer
+from sonusai.mixture.audio import Transformer
 from sonusai.mixture.audio import calculate_audio_from_transform
 from sonusai.mixture.audio import calculate_transform_from_audio
 from sonusai.mixture.audio import get_duration

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/audio.py RENAMED Viewed

@@ -97,7 +97,6 @@ def read_audio(name: Location) -> AudioT:
     from typing import Any
     import numpy as np
-    import sox
     from sonusai import SonusAIError
     from sonusai.mixture import BIT_DEPTH
@@ -196,10 +195,16 @@ def get_duration(audio: AudioT) -> float:
     return len(audio) / SAMPLE_RATE
-class AawareSoxTransformer(sox.Transformer):
+class Transformer(sox.Transformer):
+    """Override certain sox.Transformer methods
+    """
     def fir(self, coefficients):
         """Use SoX’s FFT convolution engine with given FIR filter coefficients.
+        The SonusAI override allows coefficients to be either a list of numbers
+        or a string containing a text file with the coefficients.
         Parameters
         ----------
         coefficients : list or str
@@ -225,6 +230,69 @@ class AawareSoxTransformer(sox.Transformer):
         return self
+    def tempo(self, factor, audio_type=None, quick=False):
+        """Time stretch audio without changing pitch.
+        This effect uses the WSOLA algorithm. The audio is chopped up into
+        segments which are then shifted in the time domain and overlapped
+        (cross-faded) at points where their waveforms are most similar as
+        determined by measurement of least squares.
+        The SonusAI override does not generate a warning for small factors.
+        The sox.Transformer's implementation of stretch does not invert
+        the factor even though it says that it does; this invalidates the
+        factor size check and produces the wrong result.
+        Parameters
+        ----------
+        factor : float
+            The ratio of new tempo to the old tempo.
+            For ex. 1.1 speeds up the tempo by 10%; 0.9 slows it down by 10%.
+        audio_type : str
+            Type of audio, which optimizes algorithm parameters. One of:
+             * m : Music,
+             * s : Speech,
+             * l : Linear (useful when factor is close to 1),
+        quick : bool, default=False
+            If True, this effect will run faster but with lower sound quality.
+        See Also
+        --------
+        stretch, speed, pitch
+        """
+        from sox.core import is_number
+        from sonusai import SonusAIError
+        from sonusai import logger
+        if not is_number(factor) or factor <= 0:
+            raise SonusAIError('factor must be a positive number')
+        if factor < 0.5 or factor > 2:
+            logger.warning('Using an extreme time stretching factor. Quality of results will be poor')
+        if audio_type not in [None, 'm', 's', 'l']:
+            raise SonusAIError("audio_type must be one of None, 'm', 's', or 'l'.")
+        if not isinstance(quick, bool):
+            raise SonusAIError('quick must be a boolean')
+        effect_args = ['tempo']
+        if quick:
+            effect_args.append('-q')
+        if audio_type is not None:
+            effect_args.append('-{}'.format(audio_type))
+        effect_args.append('{:f}'.format(factor))
+        self.effects.extend(effect_args)
+        self.effects_log.append('tempo')
+        return self
 def validate_input_file(input_filepath: str) -> None:
     from os.path import exists
@@ -239,4 +307,4 @@ def validate_input_file(input_filepath: str) -> None:
     ext = splitext(input_filepath)[1][1:].lower()
     if ext not in VALID_FORMATS:
-        raise SonusAIError(f'This installation of Sox cannot process .{ext} files')
+        raise SonusAIError(f'This installation of SoX cannot process .{ext} files')

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/augmentation.py RENAMED Viewed

@@ -173,17 +173,17 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
     :param length_common_denominator: Pad resulting audio to be a multiple of this
     :return: Augmented audio
     """
-    import sox
     from sonusai import SonusAIError
     from sonusai.mixture import BIT_DEPTH
     from sonusai.mixture import CHANNEL_COUNT
     from sonusai.mixture import ENCODING
     from sonusai.mixture import SAMPLE_RATE
+    from sonusai.mixture import Transformer
     try:
         # Apply augmentations
-        tfm = sox.Transformer()
+        tfm = Transformer()
         tfm.set_input_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
         tfm.set_output_format(rate=SAMPLE_RATE, bits=BIT_DEPTH, channels=CHANNEL_COUNT, encoding=ENCODING)
@@ -201,11 +201,7 @@ def apply_augmentation(audio: AudioT, augmentation: Augmentation, length_common_
             tfm.pitch(n_semitones=float(augmentation.pitch) / 100)
         if augmentation.tempo is not None:
-            factor = float(augmentation.tempo)
-            if abs(factor - 1.0) <= 0.1:
-                tfm.stretch(factor=factor)
-            else:
-                tfm.tempo(factor=factor, audio_type='s')
+            tfm.tempo(factor=float(augmentation.tempo), audio_type='s')
         if augmentation.eq1 is not None:
             tfm.equalizer(frequency=augmentation.eq1[0], width_q=augmentation.eq1[1],
@@ -240,11 +236,10 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     :return: Augmented audio
     """
     import numpy as np
-    import sox
     from sonusai import SonusAIError
-    from sonusai.mixture import AawareSoxTransformer
     from sonusai.mixture import SAMPLE_RATE
+    from sonusai.mixture import Transformer
     from sonusai.utils import linear_to_db
     max_abs_audio = max(abs(audio))
@@ -257,7 +252,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     max_db = linear_to_db(max_abs_audio)
     # Convert audio to IR sample rate and normalize to -20 dBFS to avoid clipping when applying IR
-    tfm = sox.Transformer()
+    tfm = Transformer()
     tfm.set_output_format(rate=ir.sample_rate)
     tfm.norm(db_level=-20)
     audio_out = tfm.build_array(input_array=audio, sample_rate_in=SAMPLE_RATE)
@@ -267,7 +262,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     audio_out = np.pad(array=audio_out, pad_width=(pad, pad))
     # Apply IR and convert back to global sample rate
-    tfm = AawareSoxTransformer()
+    tfm = Transformer()
     tfm.set_output_format(rate=SAMPLE_RATE)
     tfm.fir(coefficients=ir.coefficients_file)
     try:
@@ -276,7 +271,7 @@ def apply_ir(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
         raise SonusAIError(f'Error applying IR: {e}')
     # Reset level to previous max value
-    tfm = sox.Transformer()
+    tfm = Transformer()
     tfm.norm(db_level=max_db)
     audio_out = tfm.build_array(input_array=audio_out, sample_rate_in=SAMPLE_RATE)
@@ -296,8 +291,10 @@ def estimate_augmented_length_from_length(length: int,
     :param length_common_denominator: Pad resulting audio to be a multiple of this
     :return: Estimated length of augmented audio
     """
+    import numpy as np
     if augmentation.tempo is not None:
-        length = int(length // float(augmentation.tempo))
+        length = int(np.round(length / float(augmentation.tempo)))
     length += get_pad_length(length, length_common_denominator)

{sonusai-0.12.5 → sonusai-0.12.7}/sonusai/mixture/config.py RENAMED Viewed

@@ -250,7 +250,7 @@ def get_target_files(config: dict, show_progress: bool = False) -> TargetFiles:
                                              for target in config['targets']]))
     progress = tqdm(total=len(target_files), disable=not show_progress)
-    target_files = p_tqdm_map(_get_samples, target_files, progress=progress)
+    target_files = p_tqdm_map(_get_samples, target_files, progress=progress, chunksize=10)
     progress.close()
     max_class = get_max_class(config['num_classes'], config['truth_mode'] == 'mutex')
@@ -394,7 +394,7 @@ def get_noise_files(config: dict, show_progress: bool = False) -> NoiseFiles:
     noise_files = list(chain.from_iterable([_append_noise_files(noise_file=noise) for noise in config['noises']]))
     progress = tqdm(total=len(noise_files), disable=not show_progress)
-    noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress)
+    noise_files = p_tqdm_map(_get_samples, noise_files, progress=progress, chunksize=10)
     progress.close()
     return dataclass_from_dict(NoiseFiles, noise_files)

sonusai 0.12.5__tar.gz → 0.12.7__tar.gz

sonusai 0.12.5tar.gz → 0.12.7tar.gz