PyPI - sonusai - Versions diffs - 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl - Mend

sonusai 0.17.0py3-none-any.whl → 0.17.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

sonusai/audiofe.py +22 -51
sonusai/calc_metric_spenh.py +206 -213
sonusai/doc/doc.py +1 -1
sonusai/mixture/__init__.py +2 -0
sonusai/mixture/audio.py +12 -0
sonusai/mixture/datatypes.py +11 -3
sonusai/mixture/mixdb.py +101 -0
sonusai/mixture/soundfile_audio.py +39 -0
sonusai/mixture/speaker_metadata.py +35 -0
sonusai/mixture/torchaudio_audio.py +22 -0
sonusai/mkmanifest.py +1 -1
sonusai/onnx_predict.py +114 -410
sonusai/queries/queries.py +1 -1
sonusai/speech/__init__.py +3 -0
sonusai/speech/l2arctic.py +116 -0
sonusai/speech/librispeech.py +99 -0
sonusai/speech/mcgill.py +70 -0
sonusai/speech/textgrid.py +100 -0
sonusai/speech/timit.py +135 -0
sonusai/speech/types.py +12 -0
sonusai/speech/vctk.py +52 -0
sonusai/speech/voxceleb2.py +86 -0
sonusai/utils/__init__.py +2 -1
sonusai/utils/asr_manifest_functions/__init__.py +0 -1
sonusai/utils/asr_manifest_functions/data.py +0 -8
sonusai/utils/asr_manifest_functions/librispeech.py +1 -1
sonusai/utils/asr_manifest_functions/mcgill_speech.py +1 -1
sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +1 -1
sonusai/utils/braced_glob.py +7 -3
sonusai/utils/onnx_utils.py +110 -106
sonusai/utils/path_info.py +7 -0
{sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/METADATA +2 -1
{sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/RECORD +35 -30
{sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/WHEEL +1 -1
sonusai/calc_metric_spenh-save.py +0 -1334
sonusai/onnx_predict-old.py +0 -240
sonusai/onnx_predict-save.py +0 -487
sonusai/ovino_predict.py +0 -508
sonusai/ovino_query_devices.py +0 -47
sonusai/torchl_onnx-old.py +0 -216
{sonusai-0.17.0.dist-info → sonusai-0.17.2.dist-info}/entry_points.txt +0 -0

sonusai/doc/doc.py CHANGED Viewed

@@ -40,7 +40,7 @@ Required field:
  'name'
                 File name. May be one of the following:
-  audio         Supported formats are .wav, .mp3, .aif, .flac, and .ogg
+  audio         Supported formats are .wav, .mp3, .m4a, .aif, .flac, and .ogg
   glob          Matches file glob patterns
   .yml          The given YAML file is parsed into the list
   .txt          Each line in the given text file indicates an item which

sonusai/mixture/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from .audio import get_duration
 from .audio import get_next_noise
 from .audio import get_num_samples
+from .audio import get_sample_rate
 from .audio import read_audio
 from .audio import read_ir
 from .audio import validate_input_file
@@ -73,6 +74,7 @@ from .datatypes import Predict
 from .datatypes import Segsnr
 from .datatypes import SpectralMask
 from .datatypes import SpectralMasks
+from .datatypes import SpeechMetadata
 from .datatypes import TargetFile
 from .datatypes import TargetFiles
 from .datatypes import TransformConfig

sonusai/mixture/audio.py CHANGED Viewed

@@ -45,6 +45,18 @@ def validate_input_file(input_filepath: str) -> None:
         raise SonusAIError(f'This installation cannot process .{ext} files')
+@lru_cache
+def get_sample_rate(name: str) -> int:
+    """Get sample rate from audio file
+    :param name: File name
+    :return: Sample rate
+    """
+    from .soundfile_audio import get_sample_rate
+    return get_sample_rate(name)
 @lru_cache
 def read_audio(name: str) -> AudioT:
     """Read audio data from a file

sonusai/mixture/datatypes.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import TypeAlias
 import numpy as np
 import numpy.typing as npt
 from dataclasses_json import DataClassJsonMixin
+from praatio.utilities.constants import Interval
 AudioT: TypeAlias = npt.NDArray[np.float32]
 AudiosT: TypeAlias = list[AudioT]
@@ -249,7 +250,7 @@ class Target(DataClassSonusAIMixin):
     gain: Optional[float] = None
-Targets = list[Target]
+Targets: TypeAlias = list[Target]
 @dataclass
@@ -276,11 +277,15 @@ class Mixture(DataClassSonusAIMixin):
         return self.noise.file_id
     @property
-    def target_id(self) -> list[int]:
+    def target_ids(self) -> list[int]:
         return [target.file_id for target in self.targets]
+    @property
+    def target_augmentations(self) -> list[Augmentation]:
+        return [target.augmentation for target in self.targets]
-Mixtures = list[Mixture]
+Mixtures: TypeAlias = list[Mixture]
 @dataclass(frozen=True)
@@ -326,3 +331,6 @@ class MixtureDatabaseConfig(DataClassSonusAIMixin):
     target_files: Optional[TargetFiles] = None
     truth_mutex: Optional[bool] = None
     truth_reduction_function: Optional[str] = None
+SpeechMetadata: TypeAlias = str | list[Interval] | None

sonusai/mixture/mixdb.py CHANGED Viewed

@@ -1,11 +1,16 @@
 from functools import cached_property
 from functools import lru_cache
 from functools import partial
+from pathlib import Path
 from sqlite3 import Connection
 from sqlite3 import Cursor
 from typing import Any
+from typing import Callable
 from typing import Optional
+from praatio import textgrid
+from praatio.utilities.constants import Interval
 from sonusai.mixture.datatypes import AudioF
 from sonusai.mixture.datatypes import AudioT
 from sonusai.mixture.datatypes import AudiosF
@@ -23,11 +28,13 @@ from sonusai.mixture.datatypes import NoiseFiles
 from sonusai.mixture.datatypes import Segsnr
 from sonusai.mixture.datatypes import SpectralMask
 from sonusai.mixture.datatypes import SpectralMasks
+from sonusai.mixture.datatypes import SpeechMetadata
 from sonusai.mixture.datatypes import TargetFile
 from sonusai.mixture.datatypes import TargetFiles
 from sonusai.mixture.datatypes import TransformConfig
 from sonusai.mixture.datatypes import Truth
 from sonusai.mixture.datatypes import UniversalSNR
+from sonusai.mixture.tokenized_shell_vars import tokenized_expand
 def db_file(location: str, test: bool = False) -> str:
@@ -81,6 +88,7 @@ class MixtureDatabase:
     def __init__(self, location: str, test: bool = False) -> None:
         self.location = location
         self.db = partial(SQLiteContextManager, self.location, test)
+        self._speaker_metadata_tiers: list[str] = []
     @cached_property
     def json(self) -> str:
@@ -1069,6 +1077,99 @@ class MixtureDatabase:
         return class_count
+    @cached_property
+    def _speech_metadata(self) -> dict[str, dict[str, SpeechMetadata]]:
+        """Speech metadata is a nested dictionary.
+        data['target_file_name'] = { 'tier': SpeechMetadata, ... }
+        """
+        data: dict[str, dict[str, SpeechMetadata]] = {}
+        for file in self.target_files:
+            data[file.name] = {}
+            file_name, _ = tokenized_expand(file.name)
+            tg_file = Path(file_name).with_suffix('.TextGrid')
+            if tg_file.exists():
+                tg = textgrid.openTextgrid(str(tg_file), includeEmptyIntervals=False)
+                for tier in tg.tierNames:
+                    entries = tg.getTier(tier).entries
+                    if len(entries) > 1:
+                        data[file.name][tier] = entries
+                    else:
+                        data[file.name][tier] = entries[0].label
+        return data
+    @cached_property
+    def speech_metadata_tiers(self) -> list[str]:
+        return sorted(list(set([key for value in self._speech_metadata.values() for key in value.keys()])))
+    def speech_metadata_all(self, tier: str) -> list[SpeechMetadata]:
+        results = sorted(
+            set([value.get(tier) for value in self._speech_metadata.values() if isinstance(value.get(tier), str)]))
+        return results
+    def mixids_for_speech_metadata(self,
+                                   tier: str,
+                                   value: str,
+                                   predicate: Callable[[str], bool] = None) -> list[int]:
+        """Get a list of mixids for the given speech metadata tier.
+        If 'predicate' is None, then include mixids whose tier values are equal to the given 'value'. If 'predicate' is
+        not None, then ignore 'value' and use the given callable to determine which entries to include.
+        Examples:
+        >>> mixids = mixdb.mixids_for_speech_metadata('speaker_id', 'TIMIT_ARC0')
+        Get mixids for mixtures with speakers whose speaker_ids are 'TIMIT_ARC0'.
+        >>> mixids = mixdb.mixids_for_speech_metadata('age', '', lambda x: int(x) < 25)
+        Get mixids for mixtures with speakers whose ages are less than 25.
+        >>> mixids = mixdb.mixids_for_speech_metadata('dialect', '', lambda x: x in ['New York City', 'Northern'])
+        Get mixids for mixtures with speakers whose dialects are either 'New York City' or 'Northern'.
+        """
+        if predicate is None:
+            def predicate(x: str) -> bool:
+                return x == value
+        # First get list of matching target files
+        target_files = [k for k, v in self._speech_metadata.items() if
+                        isinstance(v.get(tier), str) and predicate(str(v.get(tier)))]
+        # Next get list of mixids that contain those target files
+        mixids: list[int] = []
+        for mixid in self.mixids_to_list():
+            mixid_target_files = [self.target_file(target.file_id).name for target in self.mixture(mixid).targets]
+            for mixid_target_file in mixid_target_files:
+                if mixid_target_file in target_files:
+                    mixids.append(mixid)
+        # Return sorted, unique list of mixids
+        return sorted(list(set(mixids)))
+    def get_speech_metadata(self, mixid: int, tier: str) -> list[SpeechMetadata]:
+        results: list[SpeechMetadata] = []
+        for target in self.mixture(mixid).targets:
+            data = self._speech_metadata[self.target_file(target.file_id).name].get(tier)
+            if data is None:
+                results.append(None)
+            elif isinstance(data, list):
+                # Check for tempo augmentation and adjust Interval start and end data as needed
+                entries = []
+                for entry in data:
+                    if target.augmentation.tempo is not None:
+                        entries.append(Interval(entry.start / target.augmentation.tempo,
+                                                entry.end / target.augmentation.tempo,
+                                                entry.label))
+                    else:
+                        entries.append(entry)
+            else:
+                results.append(data)
+        return results
 @lru_cache
 def _spectral_mask(db: partial, sm_id: int) -> SpectralMask:

sonusai/mixture/soundfile_audio.py CHANGED Viewed

@@ -18,6 +18,11 @@ def _raw_read(name: str) -> tuple[AudioT, int]:
             raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
             raw = raw / 2 ** (sound.sample_width * 8 - 1)
             sample_rate = sound.frame_rate
+        elif expanded_name.endswith('.m4a'):
+            sound = AudioSegment.from_file(expanded_name)
+            raw = np.array(sound.get_array_of_samples()).astype(np.float32).reshape((-1, sound.channels))
+            raw = raw / 2 ** (sound.sample_width * 8 - 1)
+            sample_rate = sound.frame_rate
         else:
             raw, sample_rate = soundfile.read(expanded_name, always_2d=True, dtype='float32')
     except Exception as e:
@@ -29,6 +34,35 @@ def _raw_read(name: str) -> tuple[AudioT, int]:
     return np.squeeze(raw[:, 0]), sample_rate
+def get_sample_rate(name: str) -> int:
+    """Get sample rate from audio file using soundfile
+    :param name: File name
+    :return: Sample rate
+    """
+    import soundfile
+    from pydub import AudioSegment
+    from sonusai import SonusAIError
+    from .tokenized_shell_vars import tokenized_expand
+    expanded_name, _ = tokenized_expand(name)
+    try:
+        if expanded_name.endswith('.mp3'):
+            return AudioSegment.from_mp3(expanded_name).frame_rate
+        if expanded_name.endswith('.m4a'):
+            return AudioSegment.from_file(expanded_name).frame_rate
+        return soundfile.info(expanded_name).samplerate
+    except Exception as e:
+        if name != expanded_name:
+            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}): {e}')
+        else:
+            raise SonusAIError(f'Error reading {name}: {e}')
 def read_ir(name: str) -> ImpulseResponseData:
     """Read impulse response data using soundfile
@@ -87,8 +121,13 @@ def get_num_samples(name: str) -> int:
         sound = AudioSegment.from_mp3(expanded_name)
         samples = sound.frame_count()
         sample_rate = sound.frame_rate
+    elif expanded_name.endswith('.m4a'):
+        sound = AudioSegment.from_file(expanded_name)
+        samples = sound.frame_count()
+        sample_rate = sound.frame_rate
     else:
         info = soundfile.info(name)
         samples = info.frames
         sample_rate = info.samplerate
     return math.ceil(SAMPLE_RATE * samples / sample_rate)

sonusai/mixture/speaker_metadata.py ADDED Viewed

@@ -0,0 +1,35 @@
+from functools import cached_property
+from pathlib import Path
+from praatio import textgrid
+from praatio.data_classes.textgrid_tier import TextgridTier
+from praatio.utilities.constants import Interval
+from sonusai.mixture.datatypes import TargetFiles
+from sonusai.mixture.tokenized_shell_vars import tokenized_expand
+class SpeakerMetadata:
+    def __init__(self, target_files: TargetFiles) -> None:
+        self.data: dict[str, dict[str, TextgridTier]] = {}
+        for file in target_files:
+            self.data[file.name] = {}
+            file_name, _ = tokenized_expand(file.name)
+            tg_file = Path(file_name).with_suffix('.TextGrid')
+            if tg_file.exists():
+                tg = textgrid.openTextgrid(str(tg_file), includeEmptyIntervals=False)
+                for tier in tg.tierNames:
+                    self.data[file.name][tier] = tg.getTier(tier)
+    @cached_property
+    def tiers(self) -> list[str]:
+        return sorted(list(set([key for value in self.data.values() for key in value.keys()])))
+    def all(self, tier: str, label_only: bool = False) -> list[Interval]:
+        results = [value[tier].entries for value in self.data.values()]
+        if label_only:
+            return sorted(set([r.label for result in results for r in result]))
+        return results
+    def mixids_for(self, tier: str, value: str) -> list[int]:
+        pass

sonusai/mixture/torchaudio_audio.py CHANGED Viewed

@@ -39,6 +39,28 @@ def read_impulse_response(name: str) -> ImpulseResponseData:
     return ImpulseResponseData(name=name, sample_rate=sample_rate, data=data)
+def get_sample_rate(name: str) -> int:
+    """Get sample rate from audio file using torchaudio
+    :param name: File name
+    :return: Sample rate
+    """
+    import torchaudio
+    from sonusai import SonusAIError
+    from .tokenized_shell_vars import tokenized_expand
+    expanded_name, _ = tokenized_expand(name)
+    try:
+        return torchaudio.info(expanded_name).sample_rate
+    except Exception as e:
+        if name != expanded_name:
+            raise SonusAIError(f'Error reading {name} (expanded: {expanded_name}):\n{e}')
+        else:
+            raise SonusAIError(f'Error reading {name}:\n{e}')
 def read_audio(name: str) -> AudioT:
     """Read audio data from a file using torchaudio

sonusai/mkmanifest.py CHANGED Viewed

@@ -94,10 +94,10 @@ def main() -> None:
     from sonusai import initial_log_messages
     from sonusai import logger
     from sonusai import update_console_handler
+    from sonusai.utils import PathInfo
     from sonusai.utils import braced_iglob
     from sonusai.utils import pp_tqdm_imap
     from sonusai.utils import seconds_to_hms
-    from sonusai.utils.asr_manifest_functions import PathInfo
     from sonusai.utils.asr_manifest_functions import collect_librispeech_transcripts
     from sonusai.utils.asr_manifest_functions import collect_vctk_noisy_speech_transcripts
     from sonusai.utils.asr_manifest_functions import get_librispeech_manifest_entry

sonusai 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl

sonusai 0.17.0py3-none-any.whl → 0.17.2py3-none-any.whl