PyPI - sonusai - Versions diffs - 0.17.2__py3-none-any.whl → 0.18.0__py3-none-any.whl - Mend

sonusai 0.17.2py3-none-any.whl → 0.18.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

sonusai/__init__.py +0 -1
sonusai/audiofe.py +3 -3
sonusai/calc_metric_spenh.py +81 -52
sonusai/doc/doc.py +0 -24
sonusai/genmetrics.py +146 -0
sonusai/genmixdb.py +0 -2
sonusai/mixture/__init__.py +0 -1
sonusai/mixture/constants.py +0 -1
sonusai/mixture/datatypes.py +2 -9
sonusai/mixture/generation.py +136 -38
sonusai/mixture/helpers.py +58 -1
sonusai/mixture/mapped_snr_f.py +56 -9
sonusai/mixture/mixdb.py +293 -170
sonusai/mixture/sox_augmentation.py +3 -0
sonusai/mixture/tokenized_shell_vars.py +8 -1
sonusai/mkwav.py +4 -4
sonusai/onnx_predict.py +2 -2
sonusai/post_spenh_targetf.py +2 -2
sonusai/speech/textgrid.py +6 -24
sonusai/speech/{voxceleb2.py → voxceleb.py} +19 -3
sonusai/utils/__init__.py +1 -1
sonusai/utils/asr_functions/aaware_whisper.py +2 -2
sonusai/utils/{wave.py → write_audio.py} +2 -2
{sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/METADATA +4 -1
{sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/RECORD +27 -33
sonusai/mixture/speaker_metadata.py +0 -35
sonusai/mkmanifest.py +0 -209
sonusai/utils/asr_manifest_functions/__init__.py +0 -6
sonusai/utils/asr_manifest_functions/data.py +0 -1
sonusai/utils/asr_manifest_functions/librispeech.py +0 -46
sonusai/utils/asr_manifest_functions/mcgill_speech.py +0 -29
sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -66
{sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/WHEEL +0 -0
{sonusai-0.17.2.dist-info → sonusai-0.18.0.dist-info}/entry_points.txt +0 -0

sonusai/mixture/mixdb.py CHANGED Viewed

@@ -1,16 +1,12 @@
 from functools import cached_property
 from functools import lru_cache
 from functools import partial
-from pathlib import Path
 from sqlite3 import Connection
 from sqlite3 import Cursor
 from typing import Any
 from typing import Callable
 from typing import Optional
-from praatio import textgrid
-from praatio.utilities.constants import Interval
 from sonusai.mixture.datatypes import AudioF
 from sonusai.mixture.datatypes import AudioT
 from sonusai.mixture.datatypes import AudiosF
@@ -34,7 +30,6 @@ from sonusai.mixture.datatypes import TargetFiles
 from sonusai.mixture.datatypes import TransformConfig
 from sonusai.mixture.datatypes import Truth
 from sonusai.mixture.datatypes import UniversalSNR
-from sonusai.mixture.tokenized_shell_vars import tokenized_expand
 def db_file(location: str, test: bool = False) -> str:
@@ -88,14 +83,12 @@ class MixtureDatabase:
     def __init__(self, location: str, test: bool = False) -> None:
         self.location = location
         self.db = partial(SQLiteContextManager, self.location, test)
-        self._speaker_metadata_tiers: list[str] = []
     @cached_property
     def json(self) -> str:
         from .datatypes import MixtureDatabaseConfig
         config = MixtureDatabaseConfig(
-            asr_manifest=self.asr_manifests,
             class_balancing=self.class_balancing,
             class_labels=self.class_labels,
             class_weights_threshold=self.class_weights_thresholds,
@@ -121,86 +114,6 @@ class MixtureDatabase:
         with open(file=json_name, mode='w') as file:
             file.write(self.json)
-    def target_asr_data(self, t_id: int) -> str | None:
-        """Get the ASR data for the given target ID
-        :param t_id: Target ID
-        :return: ASR text or None
-        """
-        from .tokenized_shell_vars import tokenized_expand
-        name, _ = tokenized_expand(self.target_file(t_id).name)
-        return self.asr_manifest_data.get(name, None)
-    def mixture_asr_data(self, m_id: int) -> list[str | None]:
-        """Get the ASR data for the given mixid
-        :param m_id: Zero-based mixture ID
-        :return: List of ASR text or None
-        """
-        return [self.target_asr_data(target.file_id) for target in self.mixture(m_id).targets]
-    @cached_property
-    def asr_manifest_data(self) -> dict[str, str]:
-        """Get ASR data
-        Each line of a manifest file should be in the following format:
-        {"audio_filepath": "/path/to/audio.wav", "text": "the transcription of the utterance", "duration": 23.147}
-        The audio_filepath field should provide an absolute path to the audio file corresponding to the utterance. The
-        text field should contain the full transcript for the utterance, and the duration field should reflect the
-        duration of the utterance in seconds.
-        Each entry in the manifest (describing one audio file) should be bordered by '{' and '}' and must be contained
-        on one line. The fields that describe the file should be separated by commas, and have the form
-        "field_name": value, as shown above.
-        Since the manifest specifies the path for each utterance, the audio files do not have to be located in the same
-        directory as the manifest, or even in any specific directory structure.
-        The manifest dictionary consists of key/value pairs where the keys are target file names and the values are ASR
-        text.
-        """
-        import json
-        from sonusai import SonusAIError
-        from .tokenized_shell_vars import tokenized_expand
-        expected_keys = ['audio_filepath', 'text', 'duration']
-        def _error_preamble(e_name: str, e_line_num: int) -> str:
-            return f'Invalid entry in ASR manifest {e_name} line {e_line_num}'
-        asr_manifest_data: dict[str, str] = {}
-        for name in self.asr_manifests:
-            expanded_name, _ = tokenized_expand(name)
-            with open(file=expanded_name, mode='r') as f:
-                line_num = 1
-                for line in f:
-                    result = json.loads(line.strip())
-                    for key in expected_keys:
-                        if key not in result:
-                            SonusAIError(f'{_error_preamble(name, line_num)}: missing field "{key}"')
-                    for key in result.keys():
-                        if key not in expected_keys:
-                            SonusAIError(f'{_error_preamble(name, line_num)}: unknown field "{key}"')
-                    key, _ = tokenized_expand(result['audio_filepath'])
-                    value = result['text']
-                    if key in asr_manifest_data:
-                        SonusAIError(f'{_error_preamble(name, line_num)}: entry already exists')
-                    asr_manifest_data[key] = value
-                    line_num += 1
-        return asr_manifest_data
     @cached_property
     def fg_config(self) -> FeatureGeneratorConfig:
         return FeatureGeneratorConfig(feature_mode=self.feature,
@@ -293,14 +206,14 @@ class MixtureDatabase:
     def feature_step_samples(self) -> int:
         return self.ft_config.R * self.fg_decimation * self.fg_step
-    def total_samples(self, mixids: GeneralizedIDs = '*') -> int:
-        return sum([self.mixture(m_id).samples for m_id in self.mixids_to_list(mixids)])
+    def total_samples(self, m_ids: GeneralizedIDs = '*') -> int:
+        return sum([self.mixture(m_id).samples for m_id in self.mixids_to_list(m_ids)])
-    def total_transform_frames(self, mixids: GeneralizedIDs = '*') -> int:
-        return self.total_samples(mixids) // self.ft_config.R
+    def total_transform_frames(self, m_ids: GeneralizedIDs = '*') -> int:
+        return self.total_samples(m_ids) // self.ft_config.R
-    def total_feature_frames(self, mixids: GeneralizedIDs = '*') -> int:
-        return self.total_samples(mixids) // self.feature_step_samples
+    def total_feature_frames(self, m_ids: GeneralizedIDs = '*') -> int:
+        return self.total_samples(m_ids) // self.feature_step_samples
     def mixture_transform_frames(self, samples: int) -> int:
         return samples // self.ft_config.R
@@ -308,24 +221,15 @@ class MixtureDatabase:
     def mixture_feature_frames(self, samples: int) -> int:
         return samples // self.feature_step_samples
-    def mixids_to_list(self, mixids: Optional[GeneralizedIDs] = None) -> list[int]:
+    def mixids_to_list(self, m_ids: Optional[GeneralizedIDs] = None) -> list[int]:
         """Resolve generalized mixture IDs to a list of integers
-        :param mixids: Generalized mixture IDs
+        :param m_ids: Generalized mixture IDs
         :return: List of mixture ID integers
         """
         from .helpers import generic_ids_to_list
-        return generic_ids_to_list(self.num_mixtures, mixids)
-    @cached_property
-    def asr_manifests(self) -> list[str]:
-        """Get ASR manifests from db
-        :return: ASR manifests
-        """
-        with self.db() as c:
-            return [str(item[0]) for item in c.execute("SELECT asr_manifest.manifest FROM asr_manifest").fetchall()]
+        return generic_ids_to_list(self.num_mixtures, m_ids)
     @cached_property
     def class_labels(self) -> list[str]:
@@ -408,7 +312,8 @@ class MixtureDatabase:
         with self.db() as c:
             target_files: TargetFiles = []
-            for target in c.execute("SELECT target_file.name, samples, level_type, id FROM target_file").fetchall():
+            for target in c.execute(
+                    "SELECT target_file.name, samples, level_type, id, speaker_id FROM target_file").fetchall():
                 truth_settings: TruthSettings = []
                 for ts in c.execute(
                         "SELECT truth_setting.setting " +
@@ -423,7 +328,8 @@ class MixtureDatabase:
                 target_files.append(TargetFile(name=target[0],
                                                samples=target[1],
                                                level_type=target[2],
-                                               truth_settings=truth_settings))
+                                               truth_settings=truth_settings,
+                                               speaker_id=target[4]))
             return target_files
     @cached_property
@@ -720,7 +626,7 @@ class MixtureDatabase:
         :param m_id: Zero-based mixture ID
         :param targets: List of augmented target audio data (one per target in the mixup)
-        :param target: Augmented target audio for the given mixid
+        :param target: Augmented target audio for the given m_id
         :param force: Force computing data from original sources regardless of whether cached data exists
         :return: Augmented target transform data
         """
@@ -1078,97 +984,312 @@ class MixtureDatabase:
         return class_count
     @cached_property
-    def _speech_metadata(self) -> dict[str, dict[str, SpeechMetadata]]:
-        """Speech metadata is a nested dictionary.
+    def speaker_metadata_tiers(self) -> list[str]:
+        import json
-        data['target_file_name'] = { 'tier': SpeechMetadata, ... }
-        """
-        data: dict[str, dict[str, SpeechMetadata]] = {}
-        for file in self.target_files:
-            data[file.name] = {}
-            file_name, _ = tokenized_expand(file.name)
-            tg_file = Path(file_name).with_suffix('.TextGrid')
-            if tg_file.exists():
-                tg = textgrid.openTextgrid(str(tg_file), includeEmptyIntervals=False)
-                for tier in tg.tierNames:
-                    entries = tg.getTier(tier).entries
-                    if len(entries) > 1:
-                        data[file.name][tier] = entries
-                    else:
-                        data[file.name][tier] = entries[0].label
+        with self.db() as c:
+            return json.loads(c.execute("SELECT speaker_metadata_tiers FROM top WHERE 1 = id").fetchone()[0])
-        return data
+    @cached_property
+    def textgrid_metadata_tiers(self) -> list[str]:
+        import json
+        with self.db() as c:
+            return json.loads(c.execute("SELECT textgrid_metadata_tiers FROM top WHERE 1 = id").fetchone()[0])
     @cached_property
     def speech_metadata_tiers(self) -> list[str]:
-        return sorted(list(set([key for value in self._speech_metadata.values() for key in value.keys()])))
+        return sorted(set(self.speaker_metadata_tiers + self.textgrid_metadata_tiers))
-    def speech_metadata_all(self, tier: str) -> list[SpeechMetadata]:
-        results = sorted(
-            set([value.get(tier) for value in self._speech_metadata.values() if isinstance(value.get(tier), str)]))
-        return results
+    def speaker(self, speaker_id: int | None, tier: str) -> Optional[str]:
+        if speaker_id is None:
+            return None
+        with self.db() as c:
+            data = c.execute(f'SELECT {tier} FROM speaker WHERE ? = id', (speaker_id,)).fetchone()
+            if data is None:
+                return None
+            if data[0] is None:
+                return None
+            return data[0]
+    def speech_metadata(self, tier: str) -> list[str]:
+        from .helpers import get_textgrid_tier_from_target_file
+        results: set[str] = set()
+        if tier in self.textgrid_metadata_tiers:
+            for target_file in self.target_files:
+                data = get_textgrid_tier_from_target_file(target_file.name, tier)
+                if data is None:
+                    continue
+                if isinstance(data, list):
+                    for item in data:
+                        results.add(item.label)
+                else:
+                    results.add(data)
+        elif tier in self.speaker_metadata_tiers:
+            for target_file in self.target_files:
+                data = self.speaker(target_file.speaker_id, tier)
+                if data is not None:
+                    results.add(data)
+        return sorted(results)
+    def mixture_speech_metadata(self, mixid: int, tier: str) -> list[SpeechMetadata]:
+        from praatio.utilities.constants import Interval
+        from .helpers import get_textgrid_tier_from_target_file
+        results: list[SpeechMetadata] = []
+        is_textgrid = tier in self.textgrid_metadata_tiers
+        if is_textgrid:
+            for target in self.mixture(mixid).targets:
+                data = get_textgrid_tier_from_target_file(self.target_file(target.file_id).name, tier)
+                if data is not None:
+                    if isinstance(data, list):
+                        # Check for tempo augmentation and adjust Interval start and end data as needed
+                        entries = []
+                        for entry in data:
+                            if target.augmentation.tempo is not None:
+                                entries.append(Interval(entry.start / target.augmentation.tempo,
+                                                        entry.end / target.augmentation.tempo,
+                                                        entry.label))
+                            else:
+                                entries.append(entry)
+                        results.append(entries)
+                    else:
+                        results.append(data)
+        else:
+            for target in self.mixture(mixid).targets:
+                data = self.speaker(self.target_file(target.file_id).speaker_id, tier)
+                if data is not None:
+                    results.append(data)
+        return sorted(results)
     def mixids_for_speech_metadata(self,
                                    tier: str,
-                                   value: str,
+                                   value: str | None,
                                    predicate: Callable[[str], bool] = None) -> list[int]:
-        """Get a list of mixids for the given speech metadata tier.
+        """Get a list of mixture IDs for the given speech metadata tier.
-        If 'predicate' is None, then include mixids whose tier values are equal to the given 'value'. If 'predicate' is
-        not None, then ignore 'value' and use the given callable to determine which entries to include.
+        If 'predicate' is None, then include mixture IDs whose tier values are equal to the given 'value'.
+        If 'predicate' is not None, then ignore 'value' and use the given callable to determine which entries
+        to include.
         Examples:
+        >>> mixdb = MixtureDatabase('/mixdb_location')
         >>> mixids = mixdb.mixids_for_speech_metadata('speaker_id', 'TIMIT_ARC0')
-        Get mixids for mixtures with speakers whose speaker_ids are 'TIMIT_ARC0'.
+        Get mixutre IDs for mixtures with speakers whose speaker_ids are 'TIMIT_ARC0'.
         >>> mixids = mixdb.mixids_for_speech_metadata('age', '', lambda x: int(x) < 25)
-        Get mixids for mixtures with speakers whose ages are less than 25.
+        Get mixture IDs for mixtures with speakers whose ages are less than 25.
         >>> mixids = mixdb.mixids_for_speech_metadata('dialect', '', lambda x: x in ['New York City', 'Northern'])
-        Get mixids for mixtures with speakers whose dialects are either 'New York City' or 'Northern'.
+        Get mixture IDs for mixtures with speakers whose dialects are either 'New York City' or 'Northern'.
         """
+        from .helpers import get_textgrid_tier_from_target_file
         if predicate is None:
-            def predicate(x: str) -> bool:
+            def predicate(x: str | None) -> bool:
                 return x == value
         # First get list of matching target files
-        target_files = [k for k, v in self._speech_metadata.items() if
-                        isinstance(v.get(tier), str) and predicate(str(v.get(tier)))]
+        target_files: list[str] = []
+        is_textgrid = tier in self.textgrid_metadata_tiers
+        for target_file in self.target_files:
+            if is_textgrid:
+                metadata = get_textgrid_tier_from_target_file(target_file.name, tier)
+            else:
+                metadata = self.speaker(target_file.speaker_id, tier)
-        # Next get list of mixids that contain those target files
-        mixids: list[int] = []
-        for mixid in self.mixids_to_list():
-            mixid_target_files = [self.target_file(target.file_id).name for target in self.mixture(mixid).targets]
+            if not isinstance(metadata, list) and predicate(metadata):
+                target_files.append(target_file.name)
+        # Next get list of mixture IDs that contain those target files
+        m_ids: list[int] = []
+        for m_id in self.mixids_to_list():
+            mixid_target_files = [self.target_file(target.file_id).name for target in self.mixture(m_id).targets]
             for mixid_target_file in mixid_target_files:
                 if mixid_target_file in target_files:
-                    mixids.append(mixid)
+                    m_ids.append(m_id)
-        # Return sorted, unique list of mixids
-        return sorted(list(set(mixids)))
+        # Return sorted, unique list of mixture IDs
+        return sorted(list(set(m_ids)))
-    def get_speech_metadata(self, mixid: int, tier: str) -> list[SpeechMetadata]:
-        results: list[SpeechMetadata] = []
-        for target in self.mixture(mixid).targets:
-            data = self._speech_metadata[self.target_file(target.file_id).name].get(tier)
+    def mixture_all_speech_metadata(self, m_id: int) -> list[dict[str, SpeechMetadata]]:
+        from .helpers import mixture_all_speech_metadata
-            if data is None:
-                results.append(None)
-            elif isinstance(data, list):
-                # Check for tempo augmentation and adjust Interval start and end data as needed
-                entries = []
-                for entry in data:
-                    if target.augmentation.tempo is not None:
-                        entries.append(Interval(entry.start / target.augmentation.tempo,
-                                                entry.end / target.augmentation.tempo,
-                                                entry.label))
-                    else:
-                        entries.append(entry)
+        return mixture_all_speech_metadata(self, self.mixture(m_id))
-            else:
-                results.append(data)
+    def mixture_metric(self, m_id: int, metric: str, force: bool = False) -> Any:
+        """Get metric data for the given mixture ID
+        :param m_id: Zero-based mixture ID
+        :param metric: Metric data to retrieve
+        :param force: Force computing data from original sources regardless of whether cached data exists
+        :return: Metric data
+        """
+        from sonusai import SonusAIError
+        supported_metrics = (
+            'MXSNR',
+            'MXSSNRAVG',
+            'MXSSNRSTD',
+            'MXSSNRDAVG',
+            'MXSSNRDSTD',
+            'MXPESQ',
+            'MXWSDR',
+            'MXPD',
+            'MXSTOI',
+            'MXCSIG',
+            'MXCBAK',
+            'MXCOVL',
+            'TDCO',
+            'TMIN',
+            'TMAX',
+            'TPKDB',
+            'TLRMS',
+            'TPKR',
+            'TTR',
+            'TCR',
+            'TFL',
+            'TPKC',
+            'NDCO',
+            'NMIN',
+            'NMAX',
+            'NPKDB',
+            'NLRMS',
+            'NPKR',
+            'NTR',
+            'NCR',
+            'NFL',
+            'NPKC',
+            'SEDAVG',
+            'SEDCNT',
+            'SEDTOPN',
+        )
+        if not (metric in supported_metrics or metric.startswith('MXWER')):
+            raise ValueError(f'Unsupported metric: {metric}')
+        if not force:
+            result = self.read_mixture_data(m_id, metric)
+            if result is not None:
+                return result
+        mixture = self.mixture(m_id)
+        if mixture is None:
+            raise SonusAIError(f'Could not find mixture for m_id: {m_id}')
+        if metric.startswith('MXWER'):
+            return None
+        if metric == 'MXSNR':
+            return self.snrs
+        if metric == 'MXSSNRAVG':
+            return None
+        if metric == 'MXSSNRSTD':
+            return None
+        if metric == 'MXSSNRDAVG':
+            return None
+        if metric == 'MXSSNRDSTD':
+            return None
+        if metric == 'MXPESQ':
+            return None
+        if metric == 'MXWSDR':
+            return None
+        if metric == 'MXPD':
+            return None
+        if metric == 'MXSTOI':
+            return None
+        if metric == 'MXCSIG':
+            return None
+        if metric == 'MXCBAK':
+            return None
+        if metric == 'MXCOVL':
+            return None
+        if metric == 'TDCO':
+            return None
+        if metric == 'TMIN':
+            return None
+        if metric == 'TMAX':
+            return None
+        if metric == 'TPKDB':
+            return None
+        if metric == 'TLRMS':
+            return None
+        if metric == 'TPKR':
+            return None
+        if metric == 'TTR':
+            return None
+        if metric == 'TCR':
+            return None
+        if metric == 'TFL':
+            return None
+        if metric == 'TPKC':
+            return None
+        if metric == 'NDCO':
+            return None
+        if metric == 'NMIN':
+            return None
+        if metric == 'NMAX':
+            return None
+        if metric == 'NPKDB':
+            return None
+        if metric == 'NLRMS':
+            return None
+        if metric == 'NPKR':
+            return None
+        if metric == 'NTR':
+            return None
+        if metric == 'NCR':
+            return None
+        if metric == 'NFL':
+            return None
+        if metric == 'NPKC':
+            return None
+        if metric == 'SEDAVG':
+            return None
+        if metric == 'SEDCNT':
+            return None
-        return results
+        if metric == 'SEDTOPN':
+            return None
 @lru_cache
@@ -1206,8 +1327,9 @@ def _target_file(db: partial, t_id: int) -> TargetFile:
     from .datatypes import TruthSettings
     with db() as c:
-        target = c.execute("SELECT target_file.name, samples, level_type FROM target_file WHERE ? = target_file.id",
-                           (t_id,)).fetchone()
+        target = c.execute(
+            "SELECT target_file.name, samples, level_type, speaker_id FROM target_file WHERE ? = target_file.id",
+            (t_id,)).fetchone()
         truth_settings: TruthSettings = []
         for ts in c.execute(
@@ -1223,7 +1345,8 @@ def _target_file(db: partial, t_id: int) -> TargetFile:
         return TargetFile(name=target[0],
                           samples=target[1],
                           level_type=target[2],
-                          truth_settings=truth_settings)
+                          truth_settings=truth_settings,
+                          speaker_id=target[3])
 @lru_cache

sonusai/mixture/sox_augmentation.py CHANGED Viewed

@@ -84,6 +84,7 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     :return: Augmented audio
     """
     import math
+    from pathlib import Path
     import tempfile
     import numpy as np
@@ -124,7 +125,9 @@ def apply_impulse_response(audio: AudioT, ir: ImpulseResponseData) -> AudioT:
     except Exception as e:
         raise SonusAIError(f'Error applying IR: {e}')
+    path = Path(temp.name)
     temp.close()
+    path.unlink()
     # Reset level to previous max value
     tfm = Transformer()

sonusai/mixture/tokenized_shell_vars.py CHANGED Viewed

@@ -1,4 +1,7 @@
-def tokenized_expand(name: str | bytes) -> tuple[str, dict[str, str]]:
+from pathlib import Path
+def tokenized_expand(name: str | bytes | Path) -> tuple[str, dict[str, str]]:
     """Expand shell variables of the forms $var, ${var} and %var%.
     Unknown variables are left unchanged.
@@ -25,6 +28,9 @@ def tokenized_expand(name: str | bytes) -> tuple[str, dict[str, str]]:
     if isinstance(name, bytes):
         name = name.decode('utf-8')
+    if isinstance(name, Path):
+        name = name.as_posix()
     name = os.fspath(name)
     token_map: dict = {}
@@ -121,6 +127,7 @@ def tokenized_expand(name: str | bytes) -> tuple[str, dict[str, str]]:
         else:
             result += c
         index += 1
     return result, token_map

sonusai/mkwav.py CHANGED Viewed

@@ -72,7 +72,7 @@ def _process_mixture(mixid: int) -> None:
     from sonusai.mixture import mixture_metadata
     from sonusai.utils import float_to_int16
-    from sonusai.utils import write_wav
+    from sonusai.utils import write_audio
     mixture_filename = join(MP_GLOBAL.mixdb.location, MP_GLOBAL.mixdb.mixtures[mixid].name)
     mixture_basename = splitext(mixture_filename)[0]
@@ -100,11 +100,11 @@ def _process_mixture(mixid: int) -> None:
             if MP_GLOBAL.write_noise:
                 noise = np.array(f['noise'])
-    write_wav(name=mixture_basename + '_mixture.wav', audio=float_to_int16(mixture))
+    write_audio(name=mixture_basename + '_mixture.wav', audio=float_to_int16(mixture))
     if MP_GLOBAL.write_target:
-        write_wav(name=mixture_basename + '_target.wav', audio=float_to_int16(target))
+        write_audio(name=mixture_basename + '_target.wav', audio=float_to_int16(target))
     if MP_GLOBAL.write_noise:
-        write_wav(name=mixture_basename + '_noise.wav', audio=float_to_int16(noise))
+        write_audio(name=mixture_basename + '_noise.wav', audio=float_to_int16(noise))
     with open(file=mixture_basename + '.txt', mode='w') as f:
         f.write(mixture_metadata(MP_GLOBAL.mixdb, MP_GLOBAL.mixdb.mixture(mixid)))

sonusai/onnx_predict.py CHANGED Viewed

@@ -100,7 +100,7 @@ def main() -> None:
     from sonusai.utils import create_ts_name
     from sonusai.utils import load_ort_session
     from sonusai.utils import reshape_inputs
-    from sonusai.utils import write_wav
+    from sonusai.utils import write_audio
     mixdb_path = None
     mixdb = None
@@ -201,7 +201,7 @@ def main() -> None:
                 predict = np.transpose(predict, [1, 0, 2])
                 predict_audio = get_audio_from_feature(feature=predict, feature_mode=feature_mode)
                 owav_name = splitext(output_fname)[0] + '_predict.wav'
-                write_wav(owav_name, predict_audio)
+                write_audio(owav_name, predict_audio)
 if __name__ == '__main__':

sonusai 0.17.2__py3-none-any.whl → 0.18.0__py3-none-any.whl

sonusai 0.17.2py3-none-any.whl → 0.18.0py3-none-any.whl