PyPI - sonusai - Versions diffs - 0.17.3__py3-none-any.whl → 0.18.1__py3-none-any.whl - Mend

sonusai 0.17.3py3-none-any.whl → 0.18.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

sonusai/__init__.py +0 -1
sonusai/calc_metric_spenh.py +74 -45
sonusai/doc/doc.py +0 -24
sonusai/genmetrics.py +146 -0
sonusai/genmixdb.py +0 -2
sonusai/mixture/__init__.py +0 -1
sonusai/mixture/constants.py +0 -1
sonusai/mixture/datatypes.py +2 -9
sonusai/mixture/db_datatypes.py +72 -0
sonusai/mixture/generation.py +139 -38
sonusai/mixture/helpers.py +75 -16
sonusai/mixture/mapped_snr_f.py +56 -9
sonusai/mixture/mixdb.py +347 -226
sonusai/mixture/tokenized_shell_vars.py +8 -1
sonusai/speech/textgrid.py +6 -24
{sonusai-0.17.3.dist-info → sonusai-0.18.1.dist-info}/METADATA +3 -1
{sonusai-0.17.3.dist-info → sonusai-0.18.1.dist-info}/RECORD +19 -24
sonusai/mixture/speaker_metadata.py +0 -35
sonusai/mkmanifest.py +0 -209
sonusai/utils/asr_manifest_functions/__init__.py +0 -6
sonusai/utils/asr_manifest_functions/data.py +0 -1
sonusai/utils/asr_manifest_functions/librispeech.py +0 -46
sonusai/utils/asr_manifest_functions/mcgill_speech.py +0 -29
sonusai/utils/asr_manifest_functions/vctk_noisy_speech.py +0 -66
{sonusai-0.17.3.dist-info → sonusai-0.18.1.dist-info}/WHEEL +0 -0
{sonusai-0.17.3.dist-info → sonusai-0.18.1.dist-info}/entry_points.txt +0 -0

sonusai/mixture/generation.py CHANGED Viewed

@@ -37,7 +37,15 @@ def initialize_db(location: str, test: bool = False) -> None:
     id INTEGER PRIMARY KEY NOT NULL,
     name TEXT NOT NULL,
     samples INTEGER NOT NULL,
-    level_type TEXT NOT NULL)
+    level_type TEXT NOT NULL,
+    speaker_id INTEGER,
+    FOREIGN KEY(speaker_id) REFERENCES speaker (id))
+    """)
+    con.execute("""
+    CREATE TABLE speaker (
+    id INTEGER PRIMARY KEY NOT NULL,
+    parent TEXT NOT NULL)
     """)
     con.execute("""
@@ -58,13 +66,9 @@ def initialize_db(location: str, test: bool = False) -> None:
     seed INTEGER NOT NULL,
     truth_mutex BOOLEAN NOT NULL,
     truth_reduction_function TEXT NOT NULL,
-    mixid_width INTEGER NOT NULL)
-    """)
-    con.execute("""
-    CREATE TABLE asr_manifest (
-    id INTEGER PRIMARY KEY NOT NULL,
-    manifest TEXT NOT NULL)
+    mixid_width INTEGER NOT NULL,
+    speaker_metadata_tiers TEXT NOT NULL,
+    textgrid_metadata_tiers TEXT NOT NULL)
     """)
     con.execute("""
@@ -155,8 +159,8 @@ def populate_top_table(location: str, config: dict, test: bool = False) -> None:
     con = db_connection(location=location, readonly=False, test=test)
     con.execute("""
     INSERT INTO top (version, class_balancing, feature, noise_mix_mode, num_classes,
-    seed, truth_mutex, truth_reduction_function, mixid_width)
-    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+    seed, truth_mutex, truth_reduction_function, mixid_width, speaker_metadata_tiers, textgrid_metadata_tiers)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     """, (
         1,
         config['class_balancing'],
@@ -166,19 +170,9 @@ def populate_top_table(location: str, config: dict, test: bool = False) -> None:
         config['seed'],
         truth_mutex,
         config['truth_reduction_function'],
-        0))
-    con.commit()
-    con.close()
-def populate_asr_manifest_table(location: str, config: dict, test: bool = False) -> None:
-    """Populate asr_manifest table
-    """
-    from .mixdb import db_connection
-    con = db_connection(location=location, readonly=False, test=test)
-    con.executemany("INSERT INTO asr_manifest (manifest) VALUES (?)",
-                    [(item,) for item in config['asr_manifest']])
+        0,
+        '',
+        ''))
     con.commit()
     con.close()
@@ -242,36 +236,51 @@ def populate_spectral_mask_table(location: str, config: dict, test: bool = False
 def populate_target_file_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
     """Populate target file table
     """
+    import json
+    from pathlib import Path
     from .mixdb import db_connection
-    con = db_connection(location=location, readonly=False, test=test)
+    _populate_truth_setting_table(location, target_files, test)
+    _populate_speaker_table(location, target_files, test)
-    # Populate truth_setting table
-    truth_settings: list[str] = []
-    for truth_setting in [truth_setting for target_file in target_files
-                          for truth_setting in target_file.truth_settings]:
-        ts = truth_setting.to_json()
-        if ts not in truth_settings:
-            truth_settings.append(ts)
-    con.executemany("INSERT INTO truth_setting (setting) VALUES (?)",
-                    [(item,) for item in truth_settings])
+    con = db_connection(location=location, readonly=False, test=test)
-    # Populate target_file table
     cur = con.cursor()
+    textgrid_metadata_tiers: set[str] = set()
     for target_file in target_files:
+        # Get TextGrid tiers for target file and add to collection
+        tiers = _get_textgrid_tiers_from_target_file(target_file.name)
+        for tier in tiers:
+            textgrid_metadata_tiers.add(tier)
+        # Get truth settings for target file
         truth_setting_ids: list[int] = []
         for truth_setting in target_file.truth_settings:
             cur.execute("SELECT truth_setting.id FROM truth_setting WHERE ? = truth_setting.setting",
                         (truth_setting.to_json(),))
             truth_setting_ids.append(cur.fetchone()[0])
-        cur.execute("INSERT INTO target_file (name, samples, level_type) VALUES (?, ?, ?)",
-                    (target_file.name, target_file.samples, target_file.level_type))
+        # Get speaker_id for target file
+        cur.execute("SELECT speaker.id FROM speaker WHERE ? = speaker.parent",
+                    (Path(target_file.name).parent.as_posix(),))
+        result = cur.fetchone()
+        speaker_id = None
+        if result is not None:
+            speaker_id = result[0]
+        # Add entry
+        cur.execute("INSERT INTO target_file (name, samples, level_type, speaker_id) VALUES (?, ?, ?, ?)",
+                    (target_file.name, target_file.samples, target_file.level_type, speaker_id))
         target_file_id = cur.lastrowid
         for truth_setting_id in truth_setting_ids:
             cur.execute("INSERT INTO target_file_truth_setting (target_file_id, truth_setting_id) VALUES (?, ?)",
                         (target_file_id, truth_setting_id))
+    # Update textgrid_metadata_tiers in the top table
+    con.execute("UPDATE top SET textgrid_metadata_tiers=? WHERE top.id = ?",
+                (json.dumps(sorted(textgrid_metadata_tiers)), 1))
     con.commit()
     con.close()
@@ -304,8 +313,8 @@ def populate_impulse_response_file_table(location: str, impulse_response_files:
 def update_mixid_width(location: str, num_mixtures: int, test: bool = False) -> None:
     """Update the mixid width
     """
-    from sonusai.utils import max_text_width
     from .mixdb import db_connection
+    from sonusai.utils import max_text_width
     con = db_connection(location=location, readonly=False, test=test)
     con.execute("UPDATE top SET mixid_width=? WHERE top.id = ?", (max_text_width(num_mixtures), 1))
@@ -367,8 +376,8 @@ def update_mixture(mixdb: MixtureDatabase,
     """
     from .audio import get_next_noise
     from .augmentation import apply_gain
-    from .helpers import get_target
     from .datatypes import GenMixData
+    from .helpers import get_target
     mixture, targets_audio = _initialize_targets_audio(mixdb, mixture)
@@ -917,3 +926,95 @@ def get_all_snrs_from_config(config: dict) -> list[UniversalSNRGenerator]:
     return ([UniversalSNRGenerator(is_random=False, _raw_value=snr) for snr in config['snrs']] +
             [UniversalSNRGenerator(is_random=True, _raw_value=snr) for snr in config['random_snrs']])
+def _get_textgrid_tiers_from_target_file(target_file: str) -> list[str]:
+    from pathlib import Path
+    from praatio import textgrid
+    from sonusai.mixture import tokenized_expand
+    textgrid_file = Path(tokenized_expand(target_file)[0]).with_suffix('.TextGrid')
+    if not textgrid_file.exists():
+        return []
+    tg = textgrid.openTextgrid(str(textgrid_file), includeEmptyIntervals=False)
+    return sorted(tg.tierNames)
+def _populate_speaker_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
+    """Populate speaker table
+    """
+    import json
+    from pathlib import Path
+    import yaml
+    from .mixdb import db_connection
+    from .tokenized_shell_vars import tokenized_expand
+    # Determine columns for speaker table
+    all_parents = set([Path(target_file.name).parent for target_file in target_files])
+    speaker_parents = (parent for parent in all_parents if Path(tokenized_expand(parent / 'speaker.yml')[0]).exists())
+    speakers: dict[Path, dict[str, str]] = {}
+    for parent in sorted(speaker_parents):
+        with open(tokenized_expand(parent / 'speaker.yml')[0], 'r') as f:
+            speakers[parent] = yaml.safe_load(f)
+    new_columns: list[str] = []
+    for keys in speakers.keys():
+        for column in speakers[keys].keys():
+            new_columns.append(column)
+    new_columns = sorted(set(new_columns))
+    con = db_connection(location=location, readonly=False, test=test)
+    for new_column in new_columns:
+        con.execute(f'ALTER TABLE speaker ADD COLUMN {new_column} TEXT')
+    # Populate speaker table
+    speaker_rows: list[tuple[str, ...]] = []
+    for key in speakers.keys():
+        entry = (speakers[key].get(column, None) for column in new_columns)
+        speaker_rows.append((key.as_posix(), *entry))
+    column_ids = ', '.join(['parent', *new_columns])
+    column_values = ', '.join(['?'] * (len(new_columns) + 1))
+    con.executemany(f'INSERT INTO speaker ({column_ids}) VALUES ({column_values})', speaker_rows)
+    con.execute("CREATE INDEX speaker_parent_idx ON speaker (parent)")
+    # Update speaker_metadata_tiers in the top table
+    tiers = [description[0] for description in con.execute("SELECT * FROM speaker").description if
+             description[0] not in ('id', 'parent')]
+    con.execute("UPDATE top SET speaker_metadata_tiers=? WHERE top.id = ?", (json.dumps(tiers), 1))
+    if 'speaker_id' in tiers:
+        con.execute("CREATE INDEX speaker_speaker_id_idx ON speaker (speaker_id)")
+    con.commit()
+    con.close()
+def _populate_truth_setting_table(location: str, target_files: TargetFiles, test: bool = False) -> None:
+    """Populate truth_setting table
+    """
+    from .mixdb import db_connection
+    con = db_connection(location=location, readonly=False, test=test)
+    # Populate truth_setting table
+    truth_settings: list[str] = []
+    for truth_setting in [truth_setting for target_file in target_files
+                          for truth_setting in target_file.truth_settings]:
+        ts = truth_setting.to_json()
+        if ts not in truth_settings:
+            truth_settings.append(ts)
+    con.executemany("INSERT INTO truth_setting (setting) VALUES (?)",
+                    [(item,) for item in truth_settings])
+    con.commit()
+    con.close()

sonusai/mixture/helpers.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from typing import Any
+from typing import Optional
+from praatio.utilities.constants import Interval
 from pyaaware import ForwardTransform
 from pyaaware import InverseTransform
@@ -18,11 +20,14 @@ from sonusai.mixture.datatypes import Mixture
 from sonusai.mixture.datatypes import NoiseFile
 from sonusai.mixture.datatypes import NoiseFiles
 from sonusai.mixture.datatypes import Segsnr
+from sonusai.mixture.datatypes import SpeechMetadata
 from sonusai.mixture.datatypes import Target
 from sonusai.mixture.datatypes import TargetFiles
 from sonusai.mixture.datatypes import Targets
 from sonusai.mixture.datatypes import TransformConfig
 from sonusai.mixture.datatypes import Truth
+from sonusai.mixture.db_datatypes import MixtureRecord
+from sonusai.mixture.db_datatypes import TargetRecord
 from sonusai.mixture.mixdb import MixtureDatabase
@@ -123,6 +128,35 @@ def write_mixture_data(mixdb: MixtureDatabase,
             f.create_dataset(name=item[0], data=item[1])
+def mixture_all_speech_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> list[dict[str, SpeechMetadata]]:
+    """Get a list of all speech metadata for the given mixture
+    """
+    results: list[dict[str, SpeechMetadata]] = []
+    for target in mixture.targets:
+        data: dict[str, SpeechMetadata] = {}
+        for tier in mixdb.speaker_metadata_tiers:
+            data[tier] = mixdb.speaker(mixdb.target_file(target.file_id).speaker_id, tier)
+        for tier in mixdb.textgrid_metadata_tiers:
+            item = get_textgrid_tier_from_target_file(mixdb.target_file(target.file_id).name, tier)
+            if isinstance(item, list):
+                # Check for tempo augmentation and adjust Interval start and end data as needed
+                entries = []
+                for entry in item:
+                    if target.augmentation.tempo is not None:
+                        entries.append(Interval(entry.start / target.augmentation.tempo,
+                                                entry.end / target.augmentation.tempo,
+                                                entry.label))
+                    else:
+                        entries.append(entry)
+                data[tier] = entries
+            else:
+                data[tier] = item
+        results.append(data)
+    return results
 def mixture_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> str:
     """Create a string of metadata for a Mixture
@@ -131,6 +165,7 @@ def mixture_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> str:
     :return: String of metadata
     """
     metadata = ''
+    speech_metadata = mixture_all_speech_metadata(mixdb, mixture)
     for mi, target in enumerate(mixture.targets):
         target_file = mixdb.target_file(target.file_id)
         target_augmentation = target.augmentation
@@ -147,7 +182,8 @@ def mixture_metadata(mixdb: MixtureDatabase, mixture: Mixture) -> str:
             metadata += f'target {mi} truth index {tsi}: {truth_settings[tsi].index}\n'
             metadata += f'target {mi} truth function {tsi}: {truth_settings[tsi].function}\n'
             metadata += f'target {mi} truth config {tsi}: {truth_settings[tsi].config}\n'
-        metadata += f'target {mi} asr: {mixdb.target_asr_data(target.file_id)}\n'
+        for key in speech_metadata[mi].keys():
+            metadata += f'target {mi} speech {key}: {speech_metadata[mi][key]}\n'
     noise = mixdb.noise_file(mixture.noise.file_id)
     noise_augmentation = mixture.noise.augmentation
     metadata += f'noise name: {noise.name}\n'
@@ -194,7 +230,7 @@ def from_mixture(mixture: Mixture) -> tuple[str, int, str, int, float, bool, flo
             mixture.target_snr_gain)
-def to_mixture(entry: tuple[str, int, str, int, float, bool, float, int, int, int, float], targets: Targets) -> Mixture:
+def to_mixture(entry: MixtureRecord, targets: Targets) -> Mixture:
     import json
     from sonusai.utils import dataclass_from_dict
@@ -204,32 +240,32 @@ def to_mixture(entry: tuple[str, int, str, int, float, bool, float, int, int, in
     from .datatypes import UniversalSNR
     return Mixture(targets=targets,
-                   name=entry[0],
-                   noise=Noise(file_id=entry[1],
-                               augmentation=dataclass_from_dict(Augmentation, json.loads(entry[2])),
-                               offset=entry[3]),
-                   noise_snr_gain=entry[4],
-                   snr=UniversalSNR(is_random=entry[5], value=entry[6]),
-                   samples=entry[7],
-                   spectral_mask_id=entry[8],
-                   spectral_mask_seed=entry[9],
-                   target_snr_gain=entry[10])
+                   name=entry.name,
+                   noise=Noise(file_id=entry.noise_file_id,
+                               augmentation=dataclass_from_dict(Augmentation, json.loads(entry.noise_augmentation)),
+                               offset=entry.noise_offset),
+                   noise_snr_gain=entry.noise_snr_gain,
+                   snr=UniversalSNR(is_random=entry.random_snr, value=entry.snr),
+                   samples=entry.samples,
+                   spectral_mask_id=entry.spectral_mask_id,
+                   spectral_mask_seed=entry.spectral_mask_seed,
+                   target_snr_gain=entry.target_snr_gain)
 def from_target(target: Target) -> tuple[int, str, float]:
     return target.file_id, target.augmentation.to_json(), target.gain
-def to_target(entry: tuple[int, str, float]) -> Target:
+def to_target(entry: TargetRecord) -> Target:
     import json
     from sonusai.utils import dataclass_from_dict
     from .datatypes import Augmentation
     from .datatypes import Target
-    return Target(file_id=entry[0],
-                  augmentation=dataclass_from_dict(Augmentation, json.loads(entry[1])),
-                  gain=entry[2])
+    return Target(file_id=entry.file_id,
+                  augmentation=dataclass_from_dict(Augmentation, json.loads(entry.augmentation)),
+                  gain=entry.gain)
 def read_mixture_data(name: str, items: list[str] | str) -> Any:
@@ -582,3 +618,26 @@ def augmented_noise_length(noise_file: NoiseFile, noise_augmentation: Augmentati
     return estimate_augmented_length_from_length(length=noise_file.samples,
                                                  tempo=noise_augmentation.tempo)
+def get_textgrid_tier_from_target_file(target_file: str, tier: str) -> Optional[SpeechMetadata]:
+    from pathlib import Path
+    from praatio import textgrid
+    from .tokenized_shell_vars import tokenized_expand
+    textgrid_file = Path(tokenized_expand(target_file)[0]).with_suffix('.TextGrid')
+    if not textgrid_file.exists():
+        return None
+    tg = textgrid.openTextgrid(str(textgrid_file), includeEmptyIntervals=False)
+    if tier not in tg.tierNames:
+        return None
+    entries = tg.getTier(tier).entries
+    if len(entries) > 1:
+        return list(entries)
+    else:
+        return entries[0].label

sonusai/mixture/mapped_snr_f.py CHANGED Viewed

@@ -7,35 +7,82 @@ def calculate_snr_f_statistics(truth_f: np.ndarray) -> tuple[np.ndarray, np.ndar
     For now, includes mean and standard deviation of the raw values (usually energy)
     and mean and standard deviation of the dB values (10 * log10).
     """
-    classes = truth_f.shape[1]
+    return (
+        calculate_snr_mean(truth_f),
+        calculate_snr_std(truth_f),
+        calculate_snr_db_mean(truth_f),
+        calculate_snr_db_std(truth_f),
+    )
-    snr_mean = np.zeros(classes, dtype=np.float32)
-    snr_std = np.zeros(classes, dtype=np.float32)
-    snr_db_mean = np.zeros(classes, dtype=np.float32)
-    snr_db_std = np.zeros(classes, dtype=np.float32)
-    for c in range(classes):
+def calculate_snr_mean(truth_f: np.ndarray) -> np.ndarray:
+    """Calculate mean of snr_f truth data."""
+    snr_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
+    for c in range(truth_f.shape[1]):
         tmp_truth = truth_f[:, c]
         tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
         if len(tmp) == 0:
             snr_mean[c] = -np.inf
-            snr_std[c] = -np.inf
         else:
             snr_mean[c] = np.mean(tmp)
+    return snr_mean
+def calculate_snr_std(truth_f: np.ndarray) -> np.ndarray:
+    """Calculate standard deviation of snr_f truth data."""
+    snr_std = np.zeros(truth_f.shape[1], dtype=np.float32)
+    for c in range(truth_f.shape[1]):
+        tmp_truth = truth_f[:, c]
+        tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
+        if len(tmp) == 0:
+            snr_std[c] = -np.inf
+        else:
             snr_std[c] = np.std(tmp, ddof=1)
+    return snr_std
+def calculate_snr_db_mean(truth_f: np.ndarray) -> np.ndarray:
+    """Calculate dB mean of snr_f truth data."""
+    snr_db_mean = np.zeros(truth_f.shape[1], dtype=np.float32)
+    for c in range(truth_f.shape[1]):
+        tmp_truth = truth_f[:, c]
+        tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
         tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
         tmp2 = tmp2[np.isfinite(tmp2)]
         if len(tmp2) == 0:
             snr_db_mean[c] = -np.inf
-            snr_db_std[c] = -np.inf
         else:
             snr_db_mean[c] = np.mean(tmp2)
+    return snr_db_mean
+def calculate_snr_db_std(truth_f: np.ndarray) -> np.ndarray:
+    """Calculate dB standard deviation of snr_f truth data."""
+    snr_db_std = np.zeros(truth_f.shape[1], dtype=np.float32)
+    for c in range(truth_f.shape[1]):
+        tmp_truth = truth_f[:, c]
+        tmp = tmp_truth[np.isfinite(tmp_truth)].astype(np.double)
+        tmp2 = 10 * np.ma.log10(tmp).filled(-np.inf)
+        tmp2 = tmp2[np.isfinite(tmp2)]
+        if len(tmp2) == 0:
+            snr_db_std[c] = -np.inf
+        else:
             snr_db_std[c] = np.std(tmp2, ddof=1)
-    return snr_mean, snr_std, snr_db_mean, snr_db_std
+    return snr_db_std
 def calculate_mapped_snr_f(truth_f: np.ndarray, snr_db_mean: np.ndarray, snr_db_std: np.ndarray) -> np.ndarray:

sonusai 0.17.3__py3-none-any.whl → 0.18.1__py3-none-any.whl

sonusai 0.17.3py3-none-any.whl → 0.18.1py3-none-any.whl