torchrir 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {torchrir-0.1.0 → torchrir-0.1.2}/PKG-INFO +60 -2
- torchrir-0.1.0/src/torchrir.egg-info/PKG-INFO → torchrir-0.1.2/README.md +57 -12
- torchrir-0.1.2/pyproject.toml +26 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/__init__.py +5 -0
- torchrir-0.1.2/src/torchrir/animation.py +172 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/config.py +11 -2
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/core.py +30 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/datasets/cmu_arctic.py +17 -2
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/datasets/utils.py +20 -2
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/dynamic.py +11 -2
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/logging_utils.py +17 -3
- torchrir-0.1.2/src/torchrir/metadata.py +216 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/plotting.py +113 -20
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/plotting_utils.py +15 -30
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/results.py +7 -1
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/room.py +30 -6
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/scene.py +6 -1
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/scene_utils.py +22 -4
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/signal.py +6 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/simulators.py +5 -1
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/utils.py +39 -7
- torchrir-0.1.0/README.md → torchrir-0.1.2/src/torchrir.egg-info/PKG-INFO +70 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir.egg-info/SOURCES.txt +2 -0
- torchrir-0.1.0/pyproject.toml +0 -18
- {torchrir-0.1.0 → torchrir-0.1.2}/LICENSE +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/NOTICE +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/setup.cfg +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/datasets/__init__.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/datasets/base.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/datasets/template.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir/directivity.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir.egg-info/dependency_links.txt +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir.egg-info/requires.txt +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/src/torchrir.egg-info/top_level.txt +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_compare_pyroomacoustics.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_core.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_device_parity.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_plotting.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_room.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_scene.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_signal.py +0 -0
- {torchrir-0.1.0 → torchrir-0.1.2}/tests/test_utils.py +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: torchrir
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: PyTorch-based room impulse response (RIR) simulation toolkit for static and dynamic scenes.
|
|
5
|
+
Project-URL: Repository, https://github.com/taishi-n/torchrir
|
|
5
6
|
Requires-Python: >=3.10
|
|
6
7
|
Description-Content-Type: text/markdown
|
|
7
8
|
License-File: LICENSE
|
|
@@ -18,6 +19,22 @@ This project has been substantially assisted by AI using Codex.
|
|
|
18
19
|
## License
|
|
19
20
|
Apache-2.0. See `LICENSE` and `NOTICE`.
|
|
20
21
|
|
|
22
|
+
## Installation
|
|
23
|
+
```bash
|
|
24
|
+
pip install torchrir
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Current Capabilities
|
|
28
|
+
- ISM-based static and dynamic RIR simulation (2D/3D shoebox rooms).
|
|
29
|
+
- Directivity patterns: `omni`, `cardioid`, `hypercardioid`, `subcardioid`, `bidir` with orientation handling.
|
|
30
|
+
- Acoustic parameters: `beta` or `t60` (Sabine), optional diffuse tail via `tdiff`.
|
|
31
|
+
- Dynamic convolution via `DynamicConvolver` (`trajectory` or `hop` modes).
|
|
32
|
+
- GPU acceleration for ISM accumulation (CUDA/MPS; MPS disables LUT).
|
|
33
|
+
- Dataset utilities with CMU ARCTIC support and example pipelines.
|
|
34
|
+
- Plotting utilities for static and dynamic scenes.
|
|
35
|
+
- Metadata export helpers for time axis, DOA, and array attributes (JSON-ready).
|
|
36
|
+
- Unified CLI with JSON/YAML config and deterministic flag support.
|
|
37
|
+
|
|
21
38
|
## Example Usage
|
|
22
39
|
```bash
|
|
23
40
|
# CMU ARCTIC + static RIR (fixed sources/mics)
|
|
@@ -26,16 +43,22 @@ uv run python examples/static.py --plot
|
|
|
26
43
|
# Dynamic RIR demos
|
|
27
44
|
uv run python examples/dynamic_mic.py --plot
|
|
28
45
|
uv run python examples/dynamic_src.py --plot
|
|
46
|
+
uv run python examples/dynamic_mic.py --gif
|
|
47
|
+
uv run python examples/dynamic_src.py --gif
|
|
29
48
|
|
|
30
49
|
# Unified CLI
|
|
31
50
|
uv run python examples/cli.py --mode static --plot
|
|
32
51
|
uv run python examples/cli.py --mode dynamic_mic --plot
|
|
33
52
|
uv run python examples/cli.py --mode dynamic_src --plot
|
|
53
|
+
uv run python examples/cli.py --mode dynamic_mic --gif
|
|
54
|
+
uv run python examples/dynamic_mic.py --gif --gif-fps 12
|
|
34
55
|
|
|
35
56
|
# Config + deterministic
|
|
36
57
|
uv run python examples/cli.py --mode static --deterministic --seed 123 --config-out outputs/cli.json
|
|
37
58
|
uv run python examples/cli.py --config-in outputs/cli.json
|
|
38
59
|
```
|
|
60
|
+
GIF FPS is auto-derived from signal duration and RIR steps unless overridden with `--gif-fps`.
|
|
61
|
+
For 3D rooms, an additional `*_3d.gif` is saved.
|
|
39
62
|
YAML configs are supported when `PyYAML` is installed.
|
|
40
63
|
```bash
|
|
41
64
|
# YAML config
|
|
@@ -43,6 +66,24 @@ uv run python examples/cli.py --mode static --config-out outputs/cli.yaml
|
|
|
43
66
|
uv run python examples/cli.py --config-in outputs/cli.yaml
|
|
44
67
|
```
|
|
45
68
|
`examples/cli_example.yaml` provides a ready-to-use template.
|
|
69
|
+
Examples also save `*_metadata.json` alongside audio outputs.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from torchrir import DynamicConvolver, MicrophoneArray, Room, Source, simulate_rir
|
|
73
|
+
|
|
74
|
+
room = Room.shoebox(size=[6.0, 4.0, 3.0], fs=16000, beta=[0.9] * 6)
|
|
75
|
+
sources = Source.positions([[1.0, 2.0, 1.5]])
|
|
76
|
+
mics = MicrophoneArray.positions([[2.0, 2.0, 1.5]])
|
|
77
|
+
|
|
78
|
+
rir = simulate_rir(
|
|
79
|
+
room=room,
|
|
80
|
+
sources=sources,
|
|
81
|
+
mics=mics,
|
|
82
|
+
max_order=6,
|
|
83
|
+
tmax=0.3,
|
|
84
|
+
device="auto",
|
|
85
|
+
)
|
|
86
|
+
```
|
|
46
87
|
|
|
47
88
|
```python
|
|
48
89
|
from torchrir import DynamicConvolver
|
|
@@ -55,6 +96,20 @@ y = DynamicConvolver(mode="hop", hop=1024).convolve(signal, rirs)
|
|
|
55
96
|
```
|
|
56
97
|
Dynamic convolution is exposed via `DynamicConvolver` only (no legacy function wrappers).
|
|
57
98
|
|
|
99
|
+
## Limitations and Potential Errors
|
|
100
|
+
- Ray tracing and FDTD simulators are placeholders and raise `NotImplementedError`.
|
|
101
|
+
- `TemplateDataset` methods are not implemented and will raise `NotImplementedError`.
|
|
102
|
+
- `simulate_rir`/`simulate_dynamic_rir` require `max_order` (or `SimulationConfig.max_order`) and either `nsample` or `tmax`.
|
|
103
|
+
- Non-`omni` directivity requires orientation; mismatched shapes raise `ValueError`.
|
|
104
|
+
- `beta` must have 4 (2D) or 6 (3D) elements; invalid sizes raise `ValueError`.
|
|
105
|
+
- `simulate_dynamic_rir` requires `src_traj` and `mic_traj` to have matching time steps.
|
|
106
|
+
- Dynamic simulation currently loops per time step; very long trajectories can be slow.
|
|
107
|
+
- MPS disables the sinc LUT path (falls back to direct sinc), which can be slower and slightly different numerically.
|
|
108
|
+
- Deterministic mode is best-effort; some backends may still be non-deterministic.
|
|
109
|
+
- YAML configs require `PyYAML`; otherwise a `ModuleNotFoundError` is raised.
|
|
110
|
+
- CMU ARCTIC downloads require network access.
|
|
111
|
+
- GIF animation output requires Pillow (via matplotlib animation writer).
|
|
112
|
+
|
|
58
113
|
### Dataset-agnostic utilities
|
|
59
114
|
```python
|
|
60
115
|
from torchrir import (
|
|
@@ -129,6 +184,7 @@ device, dtype = DeviceSpec(device="auto").resolve()
|
|
|
129
184
|
|
|
130
185
|
## References
|
|
131
186
|
- [gpuRIR](https://github.com/DavidDiazGuerra/gpuRIR)
|
|
187
|
+
- [Cross3D](https://github.com/DavidDiazGuerra/Cross3D)
|
|
132
188
|
- [pyroomacoustics](https://github.com/LCAV/pyroomacoustics)
|
|
133
189
|
- [das-generator](https://github.com/ehabets/das-generator)
|
|
134
190
|
- [rir-generator](https://github.com/audiolabs/rir-generator)
|
|
@@ -211,3 +267,5 @@ y = DynamicConvolver(mode="trajectory").convolve(signal, rirs)
|
|
|
211
267
|
- FDTD backend: implement `FDTDSimulator` with configurable grid resolution and boundary conditions.
|
|
212
268
|
- Dataset expansion: add additional dataset integrations beyond CMU ARCTIC (see `TemplateDataset`).
|
|
213
269
|
- Enhanced acoustics: frequency-dependent absorption and more advanced diffuse tail models.
|
|
270
|
+
- Add microphone and source directivity models similar to gpuRIR/pyroomacoustics.
|
|
271
|
+
- Add regression tests comparing generated RIRs against gpuRIR outputs.
|
|
@@ -1,15 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: torchrir
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Add your description here
|
|
5
|
-
Requires-Python: >=3.10
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
License-File: LICENSE
|
|
8
|
-
License-File: NOTICE
|
|
9
|
-
Requires-Dist: numpy>=2.2.6
|
|
10
|
-
Requires-Dist: torch>=2.10.0
|
|
11
|
-
Dynamic: license-file
|
|
12
|
-
|
|
13
1
|
# TorchRIR
|
|
14
2
|
|
|
15
3
|
PyTorch-based room impulse response (RIR) simulation toolkit focused on a clean, modern API with GPU support.
|
|
@@ -18,6 +6,22 @@ This project has been substantially assisted by AI using Codex.
|
|
|
18
6
|
## License
|
|
19
7
|
Apache-2.0. See `LICENSE` and `NOTICE`.
|
|
20
8
|
|
|
9
|
+
## Installation
|
|
10
|
+
```bash
|
|
11
|
+
pip install torchrir
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Current Capabilities
|
|
15
|
+
- ISM-based static and dynamic RIR simulation (2D/3D shoebox rooms).
|
|
16
|
+
- Directivity patterns: `omni`, `cardioid`, `hypercardioid`, `subcardioid`, `bidir` with orientation handling.
|
|
17
|
+
- Acoustic parameters: `beta` or `t60` (Sabine), optional diffuse tail via `tdiff`.
|
|
18
|
+
- Dynamic convolution via `DynamicConvolver` (`trajectory` or `hop` modes).
|
|
19
|
+
- GPU acceleration for ISM accumulation (CUDA/MPS; MPS disables LUT).
|
|
20
|
+
- Dataset utilities with CMU ARCTIC support and example pipelines.
|
|
21
|
+
- Plotting utilities for static and dynamic scenes.
|
|
22
|
+
- Metadata export helpers for time axis, DOA, and array attributes (JSON-ready).
|
|
23
|
+
- Unified CLI with JSON/YAML config and deterministic flag support.
|
|
24
|
+
|
|
21
25
|
## Example Usage
|
|
22
26
|
```bash
|
|
23
27
|
# CMU ARCTIC + static RIR (fixed sources/mics)
|
|
@@ -26,16 +30,22 @@ uv run python examples/static.py --plot
|
|
|
26
30
|
# Dynamic RIR demos
|
|
27
31
|
uv run python examples/dynamic_mic.py --plot
|
|
28
32
|
uv run python examples/dynamic_src.py --plot
|
|
33
|
+
uv run python examples/dynamic_mic.py --gif
|
|
34
|
+
uv run python examples/dynamic_src.py --gif
|
|
29
35
|
|
|
30
36
|
# Unified CLI
|
|
31
37
|
uv run python examples/cli.py --mode static --plot
|
|
32
38
|
uv run python examples/cli.py --mode dynamic_mic --plot
|
|
33
39
|
uv run python examples/cli.py --mode dynamic_src --plot
|
|
40
|
+
uv run python examples/cli.py --mode dynamic_mic --gif
|
|
41
|
+
uv run python examples/dynamic_mic.py --gif --gif-fps 12
|
|
34
42
|
|
|
35
43
|
# Config + deterministic
|
|
36
44
|
uv run python examples/cli.py --mode static --deterministic --seed 123 --config-out outputs/cli.json
|
|
37
45
|
uv run python examples/cli.py --config-in outputs/cli.json
|
|
38
46
|
```
|
|
47
|
+
GIF FPS is auto-derived from signal duration and RIR steps unless overridden with `--gif-fps`.
|
|
48
|
+
For 3D rooms, an additional `*_3d.gif` is saved.
|
|
39
49
|
YAML configs are supported when `PyYAML` is installed.
|
|
40
50
|
```bash
|
|
41
51
|
# YAML config
|
|
@@ -43,6 +53,24 @@ uv run python examples/cli.py --mode static --config-out outputs/cli.yaml
|
|
|
43
53
|
uv run python examples/cli.py --config-in outputs/cli.yaml
|
|
44
54
|
```
|
|
45
55
|
`examples/cli_example.yaml` provides a ready-to-use template.
|
|
56
|
+
Examples also save `*_metadata.json` alongside audio outputs.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from torchrir import DynamicConvolver, MicrophoneArray, Room, Source, simulate_rir
|
|
60
|
+
|
|
61
|
+
room = Room.shoebox(size=[6.0, 4.0, 3.0], fs=16000, beta=[0.9] * 6)
|
|
62
|
+
sources = Source.positions([[1.0, 2.0, 1.5]])
|
|
63
|
+
mics = MicrophoneArray.positions([[2.0, 2.0, 1.5]])
|
|
64
|
+
|
|
65
|
+
rir = simulate_rir(
|
|
66
|
+
room=room,
|
|
67
|
+
sources=sources,
|
|
68
|
+
mics=mics,
|
|
69
|
+
max_order=6,
|
|
70
|
+
tmax=0.3,
|
|
71
|
+
device="auto",
|
|
72
|
+
)
|
|
73
|
+
```
|
|
46
74
|
|
|
47
75
|
```python
|
|
48
76
|
from torchrir import DynamicConvolver
|
|
@@ -55,6 +83,20 @@ y = DynamicConvolver(mode="hop", hop=1024).convolve(signal, rirs)
|
|
|
55
83
|
```
|
|
56
84
|
Dynamic convolution is exposed via `DynamicConvolver` only (no legacy function wrappers).
|
|
57
85
|
|
|
86
|
+
## Limitations and Potential Errors
|
|
87
|
+
- Ray tracing and FDTD simulators are placeholders and raise `NotImplementedError`.
|
|
88
|
+
- `TemplateDataset` methods are not implemented and will raise `NotImplementedError`.
|
|
89
|
+
- `simulate_rir`/`simulate_dynamic_rir` require `max_order` (or `SimulationConfig.max_order`) and either `nsample` or `tmax`.
|
|
90
|
+
- Non-`omni` directivity requires orientation; mismatched shapes raise `ValueError`.
|
|
91
|
+
- `beta` must have 4 (2D) or 6 (3D) elements; invalid sizes raise `ValueError`.
|
|
92
|
+
- `simulate_dynamic_rir` requires `src_traj` and `mic_traj` to have matching time steps.
|
|
93
|
+
- Dynamic simulation currently loops per time step; very long trajectories can be slow.
|
|
94
|
+
- MPS disables the sinc LUT path (falls back to direct sinc), which can be slower and slightly different numerically.
|
|
95
|
+
- Deterministic mode is best-effort; some backends may still be non-deterministic.
|
|
96
|
+
- YAML configs require `PyYAML`; otherwise a `ModuleNotFoundError` is raised.
|
|
97
|
+
- CMU ARCTIC downloads require network access.
|
|
98
|
+
- GIF animation output requires Pillow (via matplotlib animation writer).
|
|
99
|
+
|
|
58
100
|
### Dataset-agnostic utilities
|
|
59
101
|
```python
|
|
60
102
|
from torchrir import (
|
|
@@ -129,6 +171,7 @@ device, dtype = DeviceSpec(device="auto").resolve()
|
|
|
129
171
|
|
|
130
172
|
## References
|
|
131
173
|
- [gpuRIR](https://github.com/DavidDiazGuerra/gpuRIR)
|
|
174
|
+
- [Cross3D](https://github.com/DavidDiazGuerra/Cross3D)
|
|
132
175
|
- [pyroomacoustics](https://github.com/LCAV/pyroomacoustics)
|
|
133
176
|
- [das-generator](https://github.com/ehabets/das-generator)
|
|
134
177
|
- [rir-generator](https://github.com/audiolabs/rir-generator)
|
|
@@ -211,3 +254,5 @@ y = DynamicConvolver(mode="trajectory").convolve(signal, rirs)
|
|
|
211
254
|
- FDTD backend: implement `FDTDSimulator` with configurable grid resolution and boundary conditions.
|
|
212
255
|
- Dataset expansion: add additional dataset integrations beyond CMU ARCTIC (see `TemplateDataset`).
|
|
213
256
|
- Enhanced acoustics: frequency-dependent absorption and more advanced diffuse tail models.
|
|
257
|
+
- Add microphone and source directivity models similar to gpuRIR/pyroomacoustics.
|
|
258
|
+
- Add regression tests comparing generated RIRs against gpuRIR outputs.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "torchrir"
|
|
3
|
+
version = "0.1.2"
|
|
4
|
+
description = "PyTorch-based room impulse response (RIR) simulation toolkit for static and dynamic scenes."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"numpy>=2.2.6",
|
|
9
|
+
"torch>=2.10.0",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
[project.urls]
|
|
13
|
+
Repository = "https://github.com/taishi-n/torchrir"
|
|
14
|
+
|
|
15
|
+
[dependency-groups]
|
|
16
|
+
dev = [
|
|
17
|
+
"git-cliff>=2.10.1",
|
|
18
|
+
"matplotlib>=3.10.8",
|
|
19
|
+
"pillow>=11.2.1",
|
|
20
|
+
"pyroomacoustics>=0.9.0",
|
|
21
|
+
"pytest>=9.0.2",
|
|
22
|
+
"soundfile>=0.13.1",
|
|
23
|
+
"sphinx>=7.0,<8.2.3",
|
|
24
|
+
"sphinx-rtd-theme>=2.0.0",
|
|
25
|
+
"myst-parser>=2.0,<4.0",
|
|
26
|
+
]
|
|
@@ -4,6 +4,8 @@ from .config import SimulationConfig, default_config
|
|
|
4
4
|
from .core import simulate_dynamic_rir, simulate_rir
|
|
5
5
|
from .dynamic import DynamicConvolver
|
|
6
6
|
from .logging_utils import LoggingConfig, get_logger, setup_logging
|
|
7
|
+
from .animation import animate_scene_gif
|
|
8
|
+
from .metadata import build_metadata, save_metadata_json
|
|
7
9
|
from .plotting import plot_scene_dynamic, plot_scene_static
|
|
8
10
|
from .plotting_utils import plot_scene_and_save
|
|
9
11
|
from .room import MicrophoneArray, Room, Source
|
|
@@ -61,6 +63,8 @@ __all__ = [
|
|
|
61
63
|
"get_logger",
|
|
62
64
|
"list_cmu_arctic_speakers",
|
|
63
65
|
"LoggingConfig",
|
|
66
|
+
"animate_scene_gif",
|
|
67
|
+
"build_metadata",
|
|
64
68
|
"resolve_device",
|
|
65
69
|
"SentenceLike",
|
|
66
70
|
"load_dataset_sources",
|
|
@@ -75,6 +79,7 @@ __all__ = [
|
|
|
75
79
|
"plot_scene_and_save",
|
|
76
80
|
"plot_scene_static",
|
|
77
81
|
"save_wav",
|
|
82
|
+
"save_metadata_json",
|
|
78
83
|
"Scene",
|
|
79
84
|
"setup_logging",
|
|
80
85
|
"SimulationConfig",
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Animation helpers for dynamic scenes."""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, Sequence
|
|
7
|
+
|
|
8
|
+
import torch
|
|
9
|
+
|
|
10
|
+
from .plotting_utils import _positions_to_cpu, _to_cpu, _traj_steps, _trajectory_to_cpu
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def animate_scene_gif(
|
|
14
|
+
*,
|
|
15
|
+
out_path: Path,
|
|
16
|
+
room: Sequence[float] | torch.Tensor,
|
|
17
|
+
sources: object | torch.Tensor | Sequence,
|
|
18
|
+
mics: object | torch.Tensor | Sequence,
|
|
19
|
+
src_traj: Optional[torch.Tensor | Sequence] = None,
|
|
20
|
+
mic_traj: Optional[torch.Tensor | Sequence] = None,
|
|
21
|
+
step: int = 1,
|
|
22
|
+
fps: Optional[float] = None,
|
|
23
|
+
signal_len: Optional[int] = None,
|
|
24
|
+
fs: Optional[float] = None,
|
|
25
|
+
duration_s: Optional[float] = None,
|
|
26
|
+
plot_2d: bool = True,
|
|
27
|
+
plot_3d: bool = False,
|
|
28
|
+
) -> Path:
|
|
29
|
+
"""Render a GIF showing source/mic trajectories.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
out_path: Destination GIF path.
|
|
33
|
+
room: Room size tensor or sequence.
|
|
34
|
+
sources: Source positions or Source-like object.
|
|
35
|
+
mics: Microphone positions or MicrophoneArray-like object.
|
|
36
|
+
src_traj: Optional source trajectory (T, n_src, dim).
|
|
37
|
+
mic_traj: Optional mic trajectory (T, n_mic, dim).
|
|
38
|
+
step: Subsampling step for trajectories.
|
|
39
|
+
fps: Frames per second for the GIF (auto if None).
|
|
40
|
+
signal_len: Optional signal length (samples) to infer elapsed time.
|
|
41
|
+
fs: Sample rate used with signal_len.
|
|
42
|
+
duration_s: Optional total duration in seconds (overrides signal_len/fs).
|
|
43
|
+
plot_2d: Use 2D projection if True.
|
|
44
|
+
plot_3d: Use 3D projection if True and dim == 3.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
The output path.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> animate_scene_gif(
|
|
51
|
+
... out_path=Path("outputs/scene.gif"),
|
|
52
|
+
... room=[6.0, 4.0, 3.0],
|
|
53
|
+
... sources=[[1.0, 2.0, 1.5]],
|
|
54
|
+
... mics=[[2.0, 2.0, 1.5]],
|
|
55
|
+
... src_traj=src_traj,
|
|
56
|
+
... mic_traj=mic_traj,
|
|
57
|
+
... signal_len=16000,
|
|
58
|
+
... fs=16000,
|
|
59
|
+
... )
|
|
60
|
+
"""
|
|
61
|
+
import matplotlib.pyplot as plt
|
|
62
|
+
from matplotlib import animation
|
|
63
|
+
|
|
64
|
+
out_path = Path(out_path)
|
|
65
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
|
|
67
|
+
room_size = _to_cpu(room)
|
|
68
|
+
src_pos = _positions_to_cpu(sources)
|
|
69
|
+
mic_pos = _positions_to_cpu(mics)
|
|
70
|
+
dim = int(room_size.numel())
|
|
71
|
+
view_dim = 3 if (plot_3d and dim == 3) else 2
|
|
72
|
+
view_room = room_size[:view_dim]
|
|
73
|
+
view_src = src_pos[:, :view_dim]
|
|
74
|
+
view_mic = mic_pos[:, :view_dim]
|
|
75
|
+
|
|
76
|
+
if src_traj is None and mic_traj is None:
|
|
77
|
+
raise ValueError("at least one trajectory is required for animation")
|
|
78
|
+
steps = _traj_steps(src_traj, mic_traj)
|
|
79
|
+
src_traj = _trajectory_to_cpu(src_traj, src_pos, steps)
|
|
80
|
+
mic_traj = _trajectory_to_cpu(mic_traj, mic_pos, steps)
|
|
81
|
+
view_src_traj = src_traj[:, :, :view_dim]
|
|
82
|
+
view_mic_traj = mic_traj[:, :, :view_dim]
|
|
83
|
+
|
|
84
|
+
if view_dim == 3:
|
|
85
|
+
fig = plt.figure()
|
|
86
|
+
ax = fig.add_subplot(111, projection="3d")
|
|
87
|
+
ax.set_xlim(0, view_room[0].item())
|
|
88
|
+
ax.set_ylim(0, view_room[1].item())
|
|
89
|
+
ax.set_zlim(0, view_room[2].item())
|
|
90
|
+
ax.set_xlabel("x")
|
|
91
|
+
ax.set_ylabel("y")
|
|
92
|
+
ax.set_zlabel("z")
|
|
93
|
+
else:
|
|
94
|
+
fig, ax = plt.subplots()
|
|
95
|
+
ax.set_xlim(0, view_room[0].item())
|
|
96
|
+
ax.set_ylim(0, view_room[1].item())
|
|
97
|
+
ax.set_aspect("equal", adjustable="box")
|
|
98
|
+
ax.set_xlabel("x")
|
|
99
|
+
ax.set_ylabel("y")
|
|
100
|
+
|
|
101
|
+
src_scatter = ax.scatter([], [], marker="^", color="tab:green", label="sources")
|
|
102
|
+
mic_scatter = ax.scatter([], [], marker="o", color="tab:orange", label="mics")
|
|
103
|
+
src_lines = []
|
|
104
|
+
mic_lines = []
|
|
105
|
+
for _ in range(view_src_traj.shape[1]):
|
|
106
|
+
if view_dim == 2:
|
|
107
|
+
line, = ax.plot([], [], color="tab:green", alpha=0.6)
|
|
108
|
+
else:
|
|
109
|
+
line, = ax.plot([], [], [], color="tab:green", alpha=0.6)
|
|
110
|
+
src_lines.append(line)
|
|
111
|
+
for _ in range(view_mic_traj.shape[1]):
|
|
112
|
+
if view_dim == 2:
|
|
113
|
+
line, = ax.plot([], [], color="tab:orange", alpha=0.6)
|
|
114
|
+
else:
|
|
115
|
+
line, = ax.plot([], [], [], color="tab:orange", alpha=0.6)
|
|
116
|
+
mic_lines.append(line)
|
|
117
|
+
|
|
118
|
+
ax.legend(loc="best")
|
|
119
|
+
|
|
120
|
+
if duration_s is None and signal_len is not None and fs is not None:
|
|
121
|
+
duration_s = float(signal_len) / float(fs)
|
|
122
|
+
|
|
123
|
+
def _frame(i: int):
|
|
124
|
+
idx = min(i * step, view_src_traj.shape[0] - 1)
|
|
125
|
+
src_frame = view_src_traj[: idx + 1]
|
|
126
|
+
mic_frame = view_mic_traj[: idx + 1]
|
|
127
|
+
src_pos_frame = view_src_traj[idx]
|
|
128
|
+
mic_pos_frame = view_mic_traj[idx]
|
|
129
|
+
|
|
130
|
+
if view_dim == 2:
|
|
131
|
+
src_scatter.set_offsets(src_pos_frame)
|
|
132
|
+
mic_scatter.set_offsets(mic_pos_frame)
|
|
133
|
+
for s_idx, line in enumerate(src_lines):
|
|
134
|
+
xy = src_frame[:, s_idx, :]
|
|
135
|
+
line.set_data(xy[:, 0], xy[:, 1])
|
|
136
|
+
for m_idx, line in enumerate(mic_lines):
|
|
137
|
+
xy = mic_frame[:, m_idx, :]
|
|
138
|
+
line.set_data(xy[:, 0], xy[:, 1])
|
|
139
|
+
else:
|
|
140
|
+
src_scatter._offsets3d = (
|
|
141
|
+
src_pos_frame[:, 0],
|
|
142
|
+
src_pos_frame[:, 1],
|
|
143
|
+
src_pos_frame[:, 2],
|
|
144
|
+
)
|
|
145
|
+
mic_scatter._offsets3d = (
|
|
146
|
+
mic_pos_frame[:, 0],
|
|
147
|
+
mic_pos_frame[:, 1],
|
|
148
|
+
mic_pos_frame[:, 2],
|
|
149
|
+
)
|
|
150
|
+
for s_idx, line in enumerate(src_lines):
|
|
151
|
+
xyz = src_frame[:, s_idx, :]
|
|
152
|
+
line.set_data(xyz[:, 0], xyz[:, 1])
|
|
153
|
+
line.set_3d_properties(xyz[:, 2])
|
|
154
|
+
for m_idx, line in enumerate(mic_lines):
|
|
155
|
+
xyz = mic_frame[:, m_idx, :]
|
|
156
|
+
line.set_data(xyz[:, 0], xyz[:, 1])
|
|
157
|
+
line.set_3d_properties(xyz[:, 2])
|
|
158
|
+
if duration_s is not None and steps > 1:
|
|
159
|
+
t = (idx / (steps - 1)) * duration_s
|
|
160
|
+
ax.set_title(f"t = {t:.2f} s")
|
|
161
|
+
return [src_scatter, mic_scatter, *src_lines, *mic_lines]
|
|
162
|
+
|
|
163
|
+
frames = max(1, (view_src_traj.shape[0] + step - 1) // step)
|
|
164
|
+
if fps is None or fps <= 0:
|
|
165
|
+
if duration_s is not None and duration_s > 0:
|
|
166
|
+
fps = frames / duration_s
|
|
167
|
+
else:
|
|
168
|
+
fps = 6.0
|
|
169
|
+
anim = animation.FuncAnimation(fig, _frame, frames=frames, interval=1000 / fps, blit=False)
|
|
170
|
+
anim.save(out_path, writer="pillow", fps=fps)
|
|
171
|
+
plt.close(fig)
|
|
172
|
+
return out_path
|
|
@@ -10,7 +10,12 @@ import torch
|
|
|
10
10
|
|
|
11
11
|
@dataclass(frozen=True)
|
|
12
12
|
class SimulationConfig:
|
|
13
|
-
"""Configuration values for RIR simulation and convolution.
|
|
13
|
+
"""Configuration values for RIR simulation and convolution.
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> cfg = SimulationConfig(max_order=6, tmax=0.3, device="auto")
|
|
17
|
+
>>> cfg.validate()
|
|
18
|
+
"""
|
|
14
19
|
|
|
15
20
|
fs: Optional[float] = None
|
|
16
21
|
max_order: Optional[int] = None
|
|
@@ -53,7 +58,11 @@ class SimulationConfig:
|
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
def default_config() -> SimulationConfig:
|
|
56
|
-
"""Return the default simulation configuration.
|
|
61
|
+
"""Return the default simulation configuration.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
>>> cfg = default_config()
|
|
65
|
+
"""
|
|
57
66
|
cfg = SimulationConfig()
|
|
58
67
|
cfg.validate()
|
|
59
68
|
return cfg
|
|
@@ -58,6 +58,18 @@ def simulate_rir(
|
|
|
58
58
|
|
|
59
59
|
Returns:
|
|
60
60
|
Tensor of shape (n_src, n_mic, nsample).
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> room = Room.shoebox(size=[6.0, 4.0, 3.0], fs=16000, beta=[0.9] * 6)
|
|
64
|
+
>>> sources = Source.positions([[1.0, 2.0, 1.5]])
|
|
65
|
+
>>> mics = MicrophoneArray.positions([[2.0, 2.0, 1.5]])
|
|
66
|
+
>>> rir = simulate_rir(
|
|
67
|
+
... room=room,
|
|
68
|
+
... sources=sources,
|
|
69
|
+
... mics=mics,
|
|
70
|
+
... max_order=6,
|
|
71
|
+
... tmax=0.3,
|
|
72
|
+
... )
|
|
61
73
|
"""
|
|
62
74
|
cfg = config or default_config()
|
|
63
75
|
cfg.validate()
|
|
@@ -208,6 +220,24 @@ def simulate_dynamic_rir(
|
|
|
208
220
|
|
|
209
221
|
Returns:
|
|
210
222
|
Tensor of shape (T, n_src, n_mic, nsample).
|
|
223
|
+
|
|
224
|
+
Example:
|
|
225
|
+
>>> room = Room.shoebox(size=[6.0, 4.0, 3.0], fs=16000, beta=[0.9] * 6)
|
|
226
|
+
>>> from torchrir import linear_trajectory
|
|
227
|
+
>>> src_traj = torch.stack(
|
|
228
|
+
... [linear_trajectory(torch.tensor([1.0, 2.0, 1.5]),
|
|
229
|
+
... torch.tensor([4.0, 2.0, 1.5]), 8)],
|
|
230
|
+
... dim=1,
|
|
231
|
+
... )
|
|
232
|
+
>>> mic_pos = torch.tensor([[2.0, 2.0, 1.5]])
|
|
233
|
+
>>> mic_traj = mic_pos.unsqueeze(0).repeat(8, 1, 1)
|
|
234
|
+
>>> rirs = simulate_dynamic_rir(
|
|
235
|
+
... room=room,
|
|
236
|
+
... src_traj=src_traj,
|
|
237
|
+
... mic_traj=mic_traj,
|
|
238
|
+
... max_order=4,
|
|
239
|
+
... tmax=0.3,
|
|
240
|
+
... )
|
|
211
241
|
"""
|
|
212
242
|
cfg = config or default_config()
|
|
213
243
|
cfg.validate()
|
|
@@ -49,6 +49,13 @@ class CmuArcticSentence:
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class CmuArcticDataset:
|
|
52
|
+
"""CMU ARCTIC dataset loader.
|
|
53
|
+
|
|
54
|
+
Example:
|
|
55
|
+
>>> dataset = CmuArcticDataset(Path("datasets/cmu_arctic"), speaker="bdl", download=True)
|
|
56
|
+
>>> audio, fs = dataset.load_wav("arctic_a0001")
|
|
57
|
+
"""
|
|
58
|
+
|
|
52
59
|
def __init__(self, root: Path, speaker: str = "bdl", download: bool = False) -> None:
|
|
53
60
|
"""Initialize a CMU ARCTIC dataset handle.
|
|
54
61
|
|
|
@@ -182,7 +189,11 @@ def _parse_text_line(line: str) -> Tuple[str, str]:
|
|
|
182
189
|
|
|
183
190
|
|
|
184
191
|
def load_wav_mono(path: Path) -> Tuple[torch.Tensor, int]:
|
|
185
|
-
"""Load a wav file and return mono audio and sample rate.
|
|
192
|
+
"""Load a wav file and return mono audio and sample rate.
|
|
193
|
+
|
|
194
|
+
Example:
|
|
195
|
+
>>> audio, fs = load_wav_mono(Path("datasets/cmu_arctic/ARCTIC/.../wav/arctic_a0001.wav"))
|
|
196
|
+
"""
|
|
186
197
|
import soundfile as sf
|
|
187
198
|
|
|
188
199
|
audio, sample_rate = sf.read(str(path), dtype="float32", always_2d=True)
|
|
@@ -195,7 +206,11 @@ def load_wav_mono(path: Path) -> Tuple[torch.Tensor, int]:
|
|
|
195
206
|
|
|
196
207
|
|
|
197
208
|
def save_wav(path: Path, audio: torch.Tensor, sample_rate: int) -> None:
|
|
198
|
-
"""Save a mono or multi-channel wav to disk.
|
|
209
|
+
"""Save a mono or multi-channel wav to disk.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
>>> save_wav(Path("outputs/example.wav"), audio, sample_rate)
|
|
213
|
+
"""
|
|
199
214
|
import soundfile as sf
|
|
200
215
|
|
|
201
216
|
audio = audio.detach().cpu().clamp(-1.0, 1.0).to(torch.float32)
|
|
@@ -11,7 +11,12 @@ from .base import BaseDataset, SentenceLike
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def choose_speakers(dataset: BaseDataset, num_sources: int, rng: random.Random) -> List[str]:
|
|
14
|
-
"""Select unique speakers for the requested number of sources.
|
|
14
|
+
"""Select unique speakers for the requested number of sources.
|
|
15
|
+
|
|
16
|
+
Example:
|
|
17
|
+
>>> rng = random.Random(0)
|
|
18
|
+
>>> speakers = choose_speakers(dataset, num_sources=2, rng=rng)
|
|
19
|
+
"""
|
|
15
20
|
speakers = dataset.list_speakers()
|
|
16
21
|
if not speakers:
|
|
17
22
|
raise RuntimeError("no speakers available")
|
|
@@ -27,7 +32,20 @@ def load_dataset_sources(
|
|
|
27
32
|
duration_s: float,
|
|
28
33
|
rng: random.Random,
|
|
29
34
|
) -> Tuple[torch.Tensor, int, List[Tuple[str, List[str]]]]:
|
|
30
|
-
"""Load and concatenate utterances for each speaker into fixed-length signals.
|
|
35
|
+
"""Load and concatenate utterances for each speaker into fixed-length signals.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
>>> from pathlib import Path
|
|
39
|
+
>>> from torchrir import CmuArcticDataset
|
|
40
|
+
>>> rng = random.Random(0)
|
|
41
|
+
>>> root = Path("datasets/cmu_arctic")
|
|
42
|
+
>>> signals, fs, info = load_dataset_sources(
|
|
43
|
+
... dataset_factory=lambda spk: CmuArcticDataset(root, speaker=spk, download=True),
|
|
44
|
+
... num_sources=2,
|
|
45
|
+
... duration_s=10.0,
|
|
46
|
+
... rng=rng,
|
|
47
|
+
... )
|
|
48
|
+
"""
|
|
31
49
|
dataset0 = dataset_factory(None)
|
|
32
50
|
speakers = choose_speakers(dataset0, num_sources, rng)
|
|
33
51
|
signals: List[torch.Tensor] = []
|
|
@@ -17,7 +17,12 @@ from .signal import _ensure_dynamic_rirs, _ensure_signal
|
|
|
17
17
|
|
|
18
18
|
@dataclass(frozen=True)
|
|
19
19
|
class DynamicConvolver:
|
|
20
|
-
"""Convolver for time-varying RIRs.
|
|
20
|
+
"""Convolver for time-varying RIRs.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
>>> convolver = DynamicConvolver(mode="trajectory")
|
|
24
|
+
>>> y = convolver.convolve(signal, rirs)
|
|
25
|
+
"""
|
|
21
26
|
|
|
22
27
|
mode: str = "trajectory"
|
|
23
28
|
hop: Optional[int] = None
|
|
@@ -28,7 +33,11 @@ class DynamicConvolver:
|
|
|
28
33
|
return self.convolve(signal, rirs)
|
|
29
34
|
|
|
30
35
|
def convolve(self, signal: Tensor, rirs: Tensor) -> Tensor:
|
|
31
|
-
"""Convolve signals with time-varying RIRs.
|
|
36
|
+
"""Convolve signals with time-varying RIRs.
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
>>> y = DynamicConvolver(mode="hop", hop=1024).convolve(signal, rirs)
|
|
40
|
+
"""
|
|
32
41
|
if self.mode not in ("trajectory", "hop"):
|
|
33
42
|
raise ValueError("mode must be 'trajectory' or 'hop'")
|
|
34
43
|
if self.mode == "hop":
|
|
@@ -9,7 +9,12 @@ from typing import Optional
|
|
|
9
9
|
|
|
10
10
|
@dataclass(frozen=True)
|
|
11
11
|
class LoggingConfig:
|
|
12
|
-
"""Configuration for torchrir logging.
|
|
12
|
+
"""Configuration for torchrir logging.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
>>> config = LoggingConfig(level="INFO")
|
|
16
|
+
>>> logger = setup_logging(config)
|
|
17
|
+
"""
|
|
13
18
|
|
|
14
19
|
level: str | int = "INFO"
|
|
15
20
|
format: str = "%(levelname)s:%(name)s:%(message)s"
|
|
@@ -33,7 +38,12 @@ class LoggingConfig:
|
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def setup_logging(config: LoggingConfig, *, name: str = "torchrir") -> logging.Logger:
|
|
36
|
-
"""Configure and return the base torchrir logger.
|
|
41
|
+
"""Configure and return the base torchrir logger.
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> logger = setup_logging(LoggingConfig(level="DEBUG"))
|
|
45
|
+
>>> logger.info("ready")
|
|
46
|
+
"""
|
|
37
47
|
logger = logging.getLogger(name)
|
|
38
48
|
level = config.resolve_level()
|
|
39
49
|
logger.setLevel(level)
|
|
@@ -47,7 +57,11 @@ def setup_logging(config: LoggingConfig, *, name: str = "torchrir") -> logging.L
|
|
|
47
57
|
|
|
48
58
|
|
|
49
59
|
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
|
50
|
-
"""Return a torchrir logger, namespaced under the torchrir root.
|
|
60
|
+
"""Return a torchrir logger, namespaced under the torchrir root.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
>>> logger = get_logger("examples.static")
|
|
64
|
+
"""
|
|
51
65
|
if not name:
|
|
52
66
|
return logging.getLogger("torchrir")
|
|
53
67
|
if name.startswith("torchrir"):
|