PyPI - torchrir - Versions diffs - 0.1.0__py3-none-any.whl - Mend

torchrir 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

torchrir/__init__.py +85 -0
torchrir/config.py +59 -0
torchrir/core.py +741 -0
torchrir/datasets/__init__.py +27 -0
torchrir/datasets/base.py +27 -0
torchrir/datasets/cmu_arctic.py +204 -0
torchrir/datasets/template.py +65 -0
torchrir/datasets/utils.py +74 -0
torchrir/directivity.py +33 -0
torchrir/dynamic.py +60 -0
torchrir/logging_utils.py +55 -0
torchrir/plotting.py +210 -0
torchrir/plotting_utils.py +173 -0
torchrir/results.py +22 -0
torchrir/room.py +150 -0
torchrir/scene.py +67 -0
torchrir/scene_utils.py +51 -0
torchrir/signal.py +233 -0
torchrir/simulators.py +86 -0
torchrir/utils.py +281 -0
torchrir-0.1.0.dist-info/METADATA +213 -0
torchrir-0.1.0.dist-info/RECORD +26 -0
torchrir-0.1.0.dist-info/WHEEL +5 -0
torchrir-0.1.0.dist-info/licenses/LICENSE +190 -0
torchrir-0.1.0.dist-info/licenses/NOTICE +4 -0
torchrir-0.1.0.dist-info/top_level.txt +1 -0

torchrir/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Dataset helpers for torchrir."""
+from .base import BaseDataset, SentenceLike
+from .utils import choose_speakers, load_dataset_sources
+from .template import TemplateDataset, TemplateSentence
+from .cmu_arctic import (
+    CmuArcticDataset,
+    CmuArcticSentence,
+    list_cmu_arctic_speakers,
+    load_wav_mono,
+    save_wav,
+)
+__all__ = [
+    "BaseDataset",
+    "CmuArcticDataset",
+    "CmuArcticSentence",
+    "choose_speakers",
+    "list_cmu_arctic_speakers",
+    "SentenceLike",
+    "load_dataset_sources",
+    "load_wav_mono",
+    "save_wav",
+    "TemplateDataset",
+    "TemplateSentence",
+]

torchrir/datasets/base.py ADDED Viewed

@@ -0,0 +1,27 @@
+from __future__ import annotations
+"""Dataset protocol definitions."""
+from typing import Protocol, Sequence, Tuple
+import torch
+class SentenceLike(Protocol):
+    """Minimal sentence interface for dataset entries."""
+    utterance_id: str
+    text: str
+class BaseDataset(Protocol):
+    """Protocol for datasets used in torchrir examples and tools."""
+    def list_speakers(self) -> list[str]:
+        """Return available speaker IDs."""
+    def available_sentences(self) -> Sequence[SentenceLike]:
+        """Return sentence entries that have audio available."""
+    def load_wav(self, utterance_id: str) -> Tuple[torch.Tensor, int]:
+        """Load audio for an utterance and return (audio, sample_rate)."""

torchrir/datasets/cmu_arctic.py ADDED Viewed

@@ -0,0 +1,204 @@
+from __future__ import annotations
+"""CMU ARCTIC dataset helpers."""
+import tarfile
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Tuple
+import torch
+import logging
+BASE_URL = "http://www.festvox.org/cmu_arctic/packed"
+VALID_SPEAKERS = {
+    "aew",
+    "ahw",
+    "aup",
+    "awb",
+    "axb",
+    "bdl",
+    "clb",
+    "eey",
+    "fem",
+    "gka",
+    "jmk",
+    "ksp",
+    "ljm",
+    "lnh",
+    "rms",
+    "rxr",
+    "slp",
+    "slt",
+}
+logger = logging.getLogger(__name__)
+def list_cmu_arctic_speakers() -> List[str]:
+    """Return supported CMU ARCTIC speaker IDs."""
+    return sorted(VALID_SPEAKERS)
+@dataclass
+class CmuArcticSentence:
+    """Sentence metadata from CMU ARCTIC."""
+    utterance_id: str
+    text: str
+class CmuArcticDataset:
+    def __init__(self, root: Path, speaker: str = "bdl", download: bool = False) -> None:
+        """Initialize a CMU ARCTIC dataset handle.
+        Args:
+            root: Root directory where the dataset is stored.
+            speaker: Speaker ID (e.g., "bdl").
+            download: Download and extract if missing.
+        """
+        if speaker not in VALID_SPEAKERS:
+            raise ValueError(f"unsupported speaker: {speaker}")
+        self.root = Path(root)
+        self.speaker = speaker
+        self._base_dir = self.root / "ARCTIC"
+        self._archive_name = f"cmu_us_{speaker}_arctic.tar.bz2"
+        self._dataset_dir = self._base_dir / f"cmu_us_{speaker}_arctic"
+        if download:
+            self._download_and_extract()
+        if not self._dataset_dir.exists():
+            raise FileNotFoundError(
+                "dataset not found; run with download=True or place the archive under "
+                f"{self._base_dir}"
+            )
+    @property
+    def wav_dir(self) -> Path:
+        """Return the directory containing wav files."""
+        return self._dataset_dir / "wav"
+    @property
+    def text_path(self) -> Path:
+        """Return the path to txt.done.data."""
+        return self._dataset_dir / "etc" / "txt.done.data"
+    def _download_and_extract(self) -> None:
+        """Download and extract the speaker archive if needed."""
+        self._base_dir.mkdir(parents=True, exist_ok=True)
+        archive_path = self._base_dir / self._archive_name
+        url = f"{BASE_URL}/{self._archive_name}"
+        if not archive_path.exists():
+            logger.info("Downloading %s", url)
+            _download(url, archive_path)
+        if not self._dataset_dir.exists():
+            logger.info("Extracting %s", archive_path)
+            try:
+                with tarfile.open(archive_path, "r:bz2") as tar:
+                    tar.extractall(self._base_dir)
+            except (tarfile.ReadError, EOFError, OSError) as exc:
+                logger.warning("Extraction failed (%s); re-downloading.", exc)
+                if archive_path.exists():
+                    archive_path.unlink()
+                _download(url, archive_path)
+                with tarfile.open(archive_path, "r:bz2") as tar:
+                    tar.extractall(self._base_dir)
+    def sentences(self) -> List[CmuArcticSentence]:
+        """Parse all sentence metadata."""
+        sentences: List[CmuArcticSentence] = []
+        with self.text_path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                utt, text = _parse_text_line(line)
+                sentences.append(CmuArcticSentence(utterance_id=utt, text=text))
+        return sentences
+    def available_sentences(self) -> List[CmuArcticSentence]:
+        """Return sentences that have a corresponding wav file."""
+        wav_ids = {p.stem for p in self.wav_dir.glob("*.wav")}
+        return [s for s in self.sentences() if s.utterance_id in wav_ids]
+    def list_speakers(self) -> List[str]:
+        """Return available speaker IDs."""
+        return list_cmu_arctic_speakers()
+    def wav_path(self, utterance_id: str) -> Path:
+        """Return the wav path for an utterance ID."""
+        return self.wav_dir / f"{utterance_id}.wav"
+    def load_wav(self, utterance_id: str) -> Tuple[torch.Tensor, int]:
+        """Load a mono wav for the given utterance ID."""
+        path = self.wav_path(utterance_id)
+        return load_wav_mono(path)
+def _download(url: str, dest: Path, retries: int = 1) -> None:
+    """Download a file with retry and resume-safe temp file."""
+    for attempt in range(retries + 1):
+        try:
+            _stream_download(url, dest)
+            return
+        except Exception as exc:
+            if dest.exists():
+                dest.unlink()
+            if attempt >= retries:
+                raise
+            logger.warning("Download failed (%s); retrying...", exc)
+def _stream_download(url: str, dest: Path) -> None:
+    """Stream a URL to disk with a progress indicator."""
+    tmp_path = dest.with_suffix(dest.suffix + ".part")
+    if tmp_path.exists():
+        tmp_path.unlink()
+    with urllib.request.urlopen(url) as response:
+        total = response.length or 0
+        downloaded = 0
+        chunk_size = 1024 * 1024
+        with tmp_path.open("wb") as f:
+            while True:
+                chunk = response.read(chunk_size)
+                if not chunk:
+                    break
+                f.write(chunk)
+                downloaded += len(chunk)
+    if total > 0 and downloaded != total:
+        raise IOError(f"incomplete download: {downloaded} of {total} bytes")
+    tmp_path.replace(dest)
+def _parse_text_line(line: str) -> Tuple[str, str]:
+    """Parse a txt.done.data line into (utterance_id, text)."""
+    left, _, right = line.partition('"')
+    utterance = left.replace("(", "").strip().split()[0]
+    text = right.rsplit('"', 1)[0]
+    return utterance, text
+def load_wav_mono(path: Path) -> Tuple[torch.Tensor, int]:
+    """Load a wav file and return mono audio and sample rate."""
+    import soundfile as sf
+    audio, sample_rate = sf.read(str(path), dtype="float32", always_2d=True)
+    audio_t = torch.from_numpy(audio)
+    if audio_t.shape[1] > 1:
+        audio_t = audio_t.mean(dim=1)
+    else:
+        audio_t = audio_t.squeeze(1)
+    return audio_t, sample_rate
+def save_wav(path: Path, audio: torch.Tensor, sample_rate: int) -> None:
+    """Save a mono or multi-channel wav to disk."""
+    import soundfile as sf
+    audio = audio.detach().cpu().clamp(-1.0, 1.0).to(torch.float32)
+    if audio.ndim == 2 and audio.shape[0] <= 8:
+        audio = audio.transpose(0, 1)
+    sf.write(str(path), audio.numpy(), sample_rate)

torchrir/datasets/template.py ADDED Viewed

@@ -0,0 +1,65 @@
+from __future__ import annotations
+"""Dataset template for future extensions.
+Work in progress:
+    This module is a placeholder for future dataset integrations. The goal is
+    to provide a consistent interface for downloading, caching, enumerating
+    speakers/utterances, and loading audio in a reproducible way.
+"""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Sequence, Tuple
+import torch
+from .base import BaseDataset, SentenceLike
+@dataclass
+class TemplateSentence:
+    """Minimal sentence metadata for a template dataset."""
+    utterance_id: str
+    text: str
+class TemplateDataset(BaseDataset):
+    """Work in progress template dataset implementation.
+    Goal:
+        Implement concrete dataset handlers by filling in download logic,
+        metadata parsing, and audio loading while keeping the BaseDataset
+        protocol intact.
+    """
+    def __init__(self, root: Path, speaker: str = "default", download: bool = False) -> None:
+        self.root = Path(root)
+        self.speaker = speaker
+        if download:
+            raise NotImplementedError(
+                "download is not implemented yet. Intended to fetch and cache "
+                "dataset archives under root."
+            )
+    def list_speakers(self) -> List[str]:
+        """Return available speaker IDs."""
+        return ["default"]
+    def available_sentences(self) -> Sequence[SentenceLike]:
+        """Return sentence entries that have audio available.
+        Work in progress:
+            Intended to parse dataset metadata and filter to utterances that
+            have corresponding audio files on disk.
+        """
+        raise NotImplementedError("available_sentences is not implemented yet")
+    def load_wav(self, utterance_id: str) -> Tuple[torch.Tensor, int]:
+        """Load audio for an utterance and return (audio, sample_rate).
+        Work in progress:
+            Intended to load audio from local cache and return mono float32.
+        """
+        raise NotImplementedError("load_wav is not implemented yet")

torchrir/datasets/utils.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+"""Dataset-agnostic utilities."""
+import random
+from typing import Callable, List, Optional, Sequence, Tuple
+import torch
+from .base import BaseDataset, SentenceLike
+def choose_speakers(dataset: BaseDataset, num_sources: int, rng: random.Random) -> List[str]:
+    """Select unique speakers for the requested number of sources."""
+    speakers = dataset.list_speakers()
+    if not speakers:
+        raise RuntimeError("no speakers available")
+    if num_sources > len(speakers):
+        raise ValueError(f"num_sources must be <= {len(speakers)} for unique speakers")
+    return rng.sample(speakers, num_sources)
+def load_dataset_sources(
+    *,
+    dataset_factory: Callable[[Optional[str]], BaseDataset],
+    num_sources: int,
+    duration_s: float,
+    rng: random.Random,
+) -> Tuple[torch.Tensor, int, List[Tuple[str, List[str]]]]:
+    """Load and concatenate utterances for each speaker into fixed-length signals."""
+    dataset0 = dataset_factory(None)
+    speakers = choose_speakers(dataset0, num_sources, rng)
+    signals: List[torch.Tensor] = []
+    info: List[Tuple[str, List[str]]] = []
+    fs: int | None = None
+    target_samples: int | None = None
+    for speaker in speakers:
+        dataset = dataset_factory(speaker)
+        sentences: Sequence[SentenceLike] = dataset.available_sentences()
+        if not sentences:
+            raise RuntimeError(f"no sentences found for speaker {speaker}")
+        utterance_ids: List[str] = []
+        segments: List[torch.Tensor] = []
+        total = 0
+        sentences = list(sentences)
+        rng.shuffle(sentences)
+        idx = 0
+        while target_samples is None or total < target_samples:
+            if idx >= len(sentences):
+                rng.shuffle(sentences)
+                idx = 0
+            sentence = sentences[idx]
+            idx += 1
+            audio, sample_rate = dataset.load_wav(sentence.utterance_id)
+            if fs is None:
+                fs = sample_rate
+                target_samples = int(duration_s * fs)
+            elif sample_rate != fs:
+                raise ValueError(
+                    f"sample rate mismatch: expected {fs}, got {sample_rate} for {speaker}"
+                )
+            segments.append(audio)
+            utterance_ids.append(sentence.utterance_id)
+            total += audio.numel()
+        signal = torch.cat(segments, dim=0)[:target_samples]
+        signals.append(signal)
+        info.append((speaker, utterance_ids))
+    stacked = torch.stack(signals, dim=0)
+    return stacked, int(fs), info

torchrir/directivity.py ADDED Viewed

@@ -0,0 +1,33 @@
+from __future__ import annotations
+"""Directivity pattern utilities."""
+import torch
+from torch import Tensor
+def directivity_gain(pattern: str, cos_theta: Tensor) -> Tensor:
+    """Compute directivity gain for a pattern given cos(theta)."""
+    pattern = pattern.lower()
+    if pattern in ("omni", "omnidirectional"):
+        return torch.ones_like(cos_theta)
+    if pattern in ("homni", "halfomni", "half-omni"):
+        return (cos_theta > 0).to(cos_theta.dtype)
+    if pattern in ("subcardioid", "subcard"):
+        return 0.75 + 0.25 * cos_theta
+    if pattern in ("cardioid", "card"):
+        return 0.5 + 0.5 * cos_theta
+    if pattern in ("hypercardioid", "hypcard"):
+        return 0.25 + 0.75 * cos_theta
+    if pattern in ("bidir", "bidirectional", "figure8", "figure-8"):
+        return cos_theta
+    raise ValueError(f"unsupported directivity pattern: {pattern}")
+def split_directivity(directivity: str | tuple[str, str]) -> tuple[str, str]:
+    """Normalize directivity specification into (source, mic)."""
+    if isinstance(directivity, (list, tuple)):
+        if len(directivity) != 2:
+            raise ValueError("directivity tuple must have length 2")
+        return directivity[0], directivity[1]
+    return directivity, directivity

torchrir/dynamic.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+"""Dynamic convolution utilities.
+DynamicConvolver is the public API for time-varying convolution. Lower-level
+helpers live in signal.py and are not part of the stable surface.
+"""
+from dataclasses import dataclass
+from typing import Optional
+import torch
+from torch import Tensor
+from .signal import _ensure_dynamic_rirs, _ensure_signal
+@dataclass(frozen=True)
+class DynamicConvolver:
+    """Convolver for time-varying RIRs."""
+    mode: str = "trajectory"
+    hop: Optional[int] = None
+    timestamps: Optional[Tensor] = None
+    fs: Optional[float] = None
+    def __call__(self, signal: Tensor, rirs: Tensor) -> Tensor:
+        return self.convolve(signal, rirs)
+    def convolve(self, signal: Tensor, rirs: Tensor) -> Tensor:
+        """Convolve signals with time-varying RIRs."""
+        if self.mode not in ("trajectory", "hop"):
+            raise ValueError("mode must be 'trajectory' or 'hop'")
+        if self.mode == "hop":
+            if self.hop is None:
+                raise ValueError("hop must be provided for hop mode")
+            return _convolve_dynamic_hop(signal, rirs, self.hop)
+        return _convolve_dynamic_trajectory(signal, rirs, timestamps=self.timestamps, fs=self.fs)
+def _convolve_dynamic_hop(signal: Tensor, rirs: Tensor, hop: int) -> Tensor:
+    from .signal import _convolve_dynamic_rir_hop
+    signal = _ensure_signal(signal)
+    rirs = _ensure_dynamic_rirs(rirs, signal)
+    return _convolve_dynamic_rir_hop(signal, rirs, hop)
+def _convolve_dynamic_trajectory(
+    signal: Tensor,
+    rirs: Tensor,
+    *,
+    timestamps: Optional[Tensor],
+    fs: Optional[float],
+) -> Tensor:
+    from .signal import _convolve_dynamic_rir_trajectory
+    signal = _ensure_signal(signal)
+    rirs = _ensure_dynamic_rirs(rirs, signal)
+    return _convolve_dynamic_rir_trajectory(signal, rirs, timestamps=timestamps, fs=fs)

torchrir/logging_utils.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+"""Logging helpers for torchrir."""
+from dataclasses import dataclass, replace
+import logging
+from typing import Optional
+@dataclass(frozen=True)
+class LoggingConfig:
+    """Configuration for torchrir logging."""
+    level: str | int = "INFO"
+    format: str = "%(levelname)s:%(name)s:%(message)s"
+    datefmt: Optional[str] = None
+    propagate: bool = False
+    def resolve_level(self) -> int:
+        """Resolve level to a logging integer constant."""
+        if isinstance(self.level, int):
+            return self.level
+        if not isinstance(self.level, str):
+            raise TypeError("level must be str or int")
+        name = self.level.upper()
+        if name not in logging._nameToLevel:
+            raise ValueError(f"unknown log level: {self.level}")
+        return logging._nameToLevel[name]
+    def replace(self, **kwargs) -> "LoggingConfig":
+        """Return a new config with updated fields."""
+        return replace(self, **kwargs)
+def setup_logging(config: LoggingConfig, *, name: str = "torchrir") -> logging.Logger:
+    """Configure and return the base torchrir logger."""
+    logger = logging.getLogger(name)
+    level = config.resolve_level()
+    logger.setLevel(level)
+    logger.propagate = config.propagate
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        handler.setLevel(level)
+        handler.setFormatter(logging.Formatter(config.format, datefmt=config.datefmt))
+        logger.addHandler(handler)
+    return logger
+def get_logger(name: Optional[str] = None) -> logging.Logger:
+    """Return a torchrir logger, namespaced under the torchrir root."""
+    if not name:
+        return logging.getLogger("torchrir")
+    if name.startswith("torchrir"):
+        return logging.getLogger(name)
+    return logging.getLogger(f"torchrir.{name}")