sounddiff 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sounddiff/__init__.py +3 -0
- sounddiff/__main__.py +5 -0
- sounddiff/cli.py +79 -0
- sounddiff/core.py +66 -0
- sounddiff/detection.py +164 -0
- sounddiff/formats.py +100 -0
- sounddiff/loudness.py +98 -0
- sounddiff/report.py +311 -0
- sounddiff/spectral.py +94 -0
- sounddiff/temporal.py +166 -0
- sounddiff/types.py +181 -0
- sounddiff-0.1.0.dist-info/METADATA +169 -0
- sounddiff-0.1.0.dist-info/RECORD +16 -0
- sounddiff-0.1.0.dist-info/WHEEL +4 -0
- sounddiff-0.1.0.dist-info/entry_points.txt +2 -0
- sounddiff-0.1.0.dist-info/licenses/LICENSE +21 -0
sounddiff/__init__.py
ADDED
sounddiff/__main__.py
ADDED
sounddiff/cli.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""CLI entry point for sounddiff."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from sounddiff import __version__
|
|
10
|
+
from sounddiff.core import diff
|
|
11
|
+
from sounddiff.report import render
|
|
12
|
+
from sounddiff.types import OutputFormat
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command()
|
|
16
|
+
@click.argument("file_a", type=click.Path(exists=True))
|
|
17
|
+
@click.argument("file_b", type=click.Path(exists=True))
|
|
18
|
+
@click.option(
|
|
19
|
+
"--format",
|
|
20
|
+
"output_format",
|
|
21
|
+
type=click.Choice(["terminal", "json", "html"]),
|
|
22
|
+
default="terminal",
|
|
23
|
+
help="Output format.",
|
|
24
|
+
)
|
|
25
|
+
@click.option(
|
|
26
|
+
"-o",
|
|
27
|
+
"--output",
|
|
28
|
+
"output_path",
|
|
29
|
+
type=click.Path(),
|
|
30
|
+
default=None,
|
|
31
|
+
help="Write output to a file (useful with --format html).",
|
|
32
|
+
)
|
|
33
|
+
@click.option(
|
|
34
|
+
"--verbose",
|
|
35
|
+
is_flag=True,
|
|
36
|
+
default=False,
|
|
37
|
+
help="Show additional detail.",
|
|
38
|
+
)
|
|
39
|
+
@click.option(
|
|
40
|
+
"--no-color",
|
|
41
|
+
is_flag=True,
|
|
42
|
+
default=False,
|
|
43
|
+
help="Disable colored terminal output.",
|
|
44
|
+
)
|
|
45
|
+
@click.version_option(version=__version__, prog_name="sounddiff")
|
|
46
|
+
def main(
|
|
47
|
+
file_a: str,
|
|
48
|
+
file_b: str,
|
|
49
|
+
output_format: str,
|
|
50
|
+
output_path: str | None,
|
|
51
|
+
verbose: bool,
|
|
52
|
+
no_color: bool,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Compare two audio files and report what changed.
|
|
55
|
+
|
|
56
|
+
sounddiff FILE_A FILE_B
|
|
57
|
+
|
|
58
|
+
Compares FILE_A (reference) against FILE_B (comparison) and reports
|
|
59
|
+
differences in loudness, spectral content, timing, and potential issues.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
result = diff(file_a, file_b)
|
|
63
|
+
except FileNotFoundError as e:
|
|
64
|
+
click.echo(f"Error: {e}", err=True)
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
except ValueError as e:
|
|
67
|
+
click.echo(f"Error: {e}", err=True)
|
|
68
|
+
sys.exit(1)
|
|
69
|
+
except RuntimeError as e:
|
|
70
|
+
click.echo(f"Error: {e}", err=True)
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
|
|
73
|
+
fmt = OutputFormat(output_format)
|
|
74
|
+
output = render(result, fmt, output_path, no_color=no_color)
|
|
75
|
+
|
|
76
|
+
if output_path and fmt == OutputFormat.HTML:
|
|
77
|
+
click.echo(f"Report written to {output_path}")
|
|
78
|
+
else:
|
|
79
|
+
click.echo(output)
|
sounddiff/core.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Core orchestration: run the full comparison pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from sounddiff.detection import compare_detection
|
|
8
|
+
from sounddiff.formats import load_audio
|
|
9
|
+
from sounddiff.loudness import compare_loudness
|
|
10
|
+
from sounddiff.spectral import compare_spectral
|
|
11
|
+
from sounddiff.temporal import compare_temporal
|
|
12
|
+
from sounddiff.types import DiffResult, MetadataComparison
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def diff(
|
|
16
|
+
path_a: str | Path,
|
|
17
|
+
path_b: str | Path,
|
|
18
|
+
) -> DiffResult:
|
|
19
|
+
"""Compare two audio files and return a structured diff.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
path_a: Path to the first (reference) audio file.
|
|
23
|
+
path_b: Path to the second (comparison) audio file.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
DiffResult containing all analysis results.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
FileNotFoundError: If either file does not exist.
|
|
30
|
+
ValueError: If either file format is unsupported.
|
|
31
|
+
"""
|
|
32
|
+
data_a, meta_a = load_audio(path_a)
|
|
33
|
+
data_b, meta_b = load_audio(path_b)
|
|
34
|
+
|
|
35
|
+
metadata = MetadataComparison(file_a=meta_a, file_b=meta_b)
|
|
36
|
+
|
|
37
|
+
warnings: list[str] = []
|
|
38
|
+
if not metadata.same_sample_rate:
|
|
39
|
+
warnings.append(
|
|
40
|
+
f"Sample rate mismatch: {meta_a.sample_rate} Hz vs {meta_b.sample_rate} Hz. "
|
|
41
|
+
"Comparison accuracy may be reduced."
|
|
42
|
+
)
|
|
43
|
+
if not metadata.same_channels:
|
|
44
|
+
warnings.append(
|
|
45
|
+
f"Channel count mismatch: {meta_a.channels} vs {meta_b.channels}. "
|
|
46
|
+
"Comparison will use mixed-to-mono signals where needed."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
loudness = compare_loudness(data_a, data_b, meta_a.sample_rate, meta_b.sample_rate)
|
|
50
|
+
spectral = compare_spectral(data_a, data_b, meta_a.sample_rate, meta_b.sample_rate)
|
|
51
|
+
temporal = compare_temporal(data_a, data_b, meta_a.sample_rate)
|
|
52
|
+
|
|
53
|
+
file_a_name = Path(meta_a.path).name
|
|
54
|
+
file_b_name = Path(meta_b.path).name
|
|
55
|
+
detection = compare_detection(
|
|
56
|
+
data_a, data_b, meta_a.sample_rate, meta_b.sample_rate, file_a_name, file_b_name
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return DiffResult(
|
|
60
|
+
metadata=metadata,
|
|
61
|
+
loudness=loudness,
|
|
62
|
+
spectral=spectral,
|
|
63
|
+
temporal=temporal,
|
|
64
|
+
detection=detection,
|
|
65
|
+
warnings=warnings,
|
|
66
|
+
)
|
sounddiff/detection.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Detection: clipping and silence analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from sounddiff.types import ClipEvent, DetectionResult, SilenceRegion
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def detect_clipping(
|
|
11
|
+
data: np.ndarray,
|
|
12
|
+
sample_rate: int,
|
|
13
|
+
file_label: str,
|
|
14
|
+
threshold: float = 0.99,
|
|
15
|
+
min_consecutive: int = 2,
|
|
16
|
+
) -> list[ClipEvent]:
|
|
17
|
+
"""Detect clipping events in an audio signal.
|
|
18
|
+
|
|
19
|
+
Clipping is defined as consecutive samples at or above the threshold.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
data: Audio signal, shape (frames, channels).
|
|
23
|
+
sample_rate: Sample rate in Hz.
|
|
24
|
+
file_label: Label for this file (used in output).
|
|
25
|
+
threshold: Amplitude threshold for clipping detection.
|
|
26
|
+
min_consecutive: Minimum consecutive samples to count as clipping.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of ClipEvent objects.
|
|
30
|
+
"""
|
|
31
|
+
clips: list[ClipEvent] = []
|
|
32
|
+
|
|
33
|
+
for channel in range(data.shape[1]):
|
|
34
|
+
channel_data = np.abs(data[:, channel])
|
|
35
|
+
is_clipping = channel_data >= threshold
|
|
36
|
+
|
|
37
|
+
# Find runs of consecutive clipping samples
|
|
38
|
+
changes = np.diff(is_clipping.astype(int))
|
|
39
|
+
starts = np.where(changes == 1)[0] + 1
|
|
40
|
+
ends = np.where(changes == -1)[0] + 1
|
|
41
|
+
|
|
42
|
+
# Handle edge cases
|
|
43
|
+
if is_clipping[0]:
|
|
44
|
+
starts = np.concatenate([[0], starts])
|
|
45
|
+
if is_clipping[-1]:
|
|
46
|
+
ends = np.concatenate([ends, [len(channel_data)]])
|
|
47
|
+
|
|
48
|
+
for start, end in zip(starts, ends, strict=True):
|
|
49
|
+
count = end - start
|
|
50
|
+
if count >= min_consecutive:
|
|
51
|
+
timestamp = start / sample_rate
|
|
52
|
+
clips.append(
|
|
53
|
+
ClipEvent(
|
|
54
|
+
file_label=file_label,
|
|
55
|
+
timestamp=round(timestamp, 3),
|
|
56
|
+
channel=channel,
|
|
57
|
+
sample_count=int(count),
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return clips
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def detect_silence(
|
|
65
|
+
data: np.ndarray,
|
|
66
|
+
sample_rate: int,
|
|
67
|
+
file_label: str,
|
|
68
|
+
threshold_db: float = -60.0,
|
|
69
|
+
min_duration: float = 0.1,
|
|
70
|
+
) -> list[SilenceRegion]:
|
|
71
|
+
"""Detect regions of silence in an audio signal.
|
|
72
|
+
|
|
73
|
+
Silence is defined as RMS energy below the threshold for at least min_duration.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
data: Audio signal, shape (frames, channels).
|
|
77
|
+
sample_rate: Sample rate in Hz.
|
|
78
|
+
file_label: Label for this file (used in output).
|
|
79
|
+
threshold_db: RMS threshold in dB for silence.
|
|
80
|
+
min_duration: Minimum duration in seconds for a silence region.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
List of SilenceRegion objects.
|
|
84
|
+
"""
|
|
85
|
+
# Mix to mono for silence detection
|
|
86
|
+
mono = np.mean(data, axis=1)
|
|
87
|
+
|
|
88
|
+
# Compute RMS in short windows
|
|
89
|
+
window_size = int(0.01 * sample_rate) # 10ms windows
|
|
90
|
+
if window_size == 0:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
threshold_linear = 10 ** (threshold_db / 20.0)
|
|
94
|
+
min_samples = int(min_duration * sample_rate)
|
|
95
|
+
|
|
96
|
+
# Compute per-window RMS
|
|
97
|
+
n_windows = len(mono) // window_size
|
|
98
|
+
if n_windows == 0:
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
truncated = mono[: n_windows * window_size].reshape(n_windows, window_size)
|
|
102
|
+
rms_values = np.sqrt(np.mean(truncated**2, axis=1))
|
|
103
|
+
is_silent = rms_values < threshold_linear
|
|
104
|
+
|
|
105
|
+
# Find runs of silence
|
|
106
|
+
regions: list[SilenceRegion] = []
|
|
107
|
+
changes = np.diff(is_silent.astype(int))
|
|
108
|
+
starts = np.where(changes == 1)[0] + 1
|
|
109
|
+
ends = np.where(changes == -1)[0] + 1
|
|
110
|
+
|
|
111
|
+
if is_silent[0]:
|
|
112
|
+
starts = np.concatenate([[0], starts])
|
|
113
|
+
if is_silent[-1]:
|
|
114
|
+
ends = np.concatenate([ends, [n_windows]])
|
|
115
|
+
|
|
116
|
+
for start, end in zip(starts, ends, strict=True):
|
|
117
|
+
sample_start = start * window_size
|
|
118
|
+
sample_end = end * window_size
|
|
119
|
+
duration_samples = sample_end - sample_start
|
|
120
|
+
|
|
121
|
+
if duration_samples >= min_samples:
|
|
122
|
+
regions.append(
|
|
123
|
+
SilenceRegion(
|
|
124
|
+
file_label=file_label,
|
|
125
|
+
start_time=round(sample_start / sample_rate, 3),
|
|
126
|
+
end_time=round(sample_end / sample_rate, 3),
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return regions
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def compare_detection(
|
|
134
|
+
data_a: np.ndarray,
|
|
135
|
+
data_b: np.ndarray,
|
|
136
|
+
sample_rate_a: int,
|
|
137
|
+
sample_rate_b: int,
|
|
138
|
+
file_a_name: str,
|
|
139
|
+
file_b_name: str,
|
|
140
|
+
) -> DetectionResult:
|
|
141
|
+
"""Run clipping and silence detection on both files.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
data_a: First audio signal.
|
|
145
|
+
data_b: Second audio signal.
|
|
146
|
+
sample_rate_a: Sample rate of first signal.
|
|
147
|
+
sample_rate_b: Sample rate of second signal.
|
|
148
|
+
file_a_name: Display name for first file.
|
|
149
|
+
file_b_name: Display name for second file.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
DetectionResult with clips and silence regions.
|
|
153
|
+
"""
|
|
154
|
+
clips_a = detect_clipping(data_a, sample_rate_a, file_a_name)
|
|
155
|
+
clips_b = detect_clipping(data_b, sample_rate_b, file_b_name)
|
|
156
|
+
|
|
157
|
+
silence_a = detect_silence(data_a, sample_rate_a, file_a_name)
|
|
158
|
+
silence_b = detect_silence(data_b, sample_rate_b, file_b_name)
|
|
159
|
+
|
|
160
|
+
return DetectionResult(
|
|
161
|
+
clips=clips_a + clips_b,
|
|
162
|
+
silence_regions_a=silence_a,
|
|
163
|
+
silence_regions_b=silence_b,
|
|
164
|
+
)
|
sounddiff/formats.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Audio file loading and format detection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np # noqa: TC002 (used at runtime in return type)
|
|
8
|
+
import soundfile as sf
|
|
9
|
+
|
|
10
|
+
from sounddiff.types import AudioMetadata
|
|
11
|
+
|
|
12
|
+
# Formats supported natively via libsndfile
|
|
13
|
+
NATIVE_FORMATS = {".wav", ".flac", ".ogg", ".aiff", ".aif"}
|
|
14
|
+
|
|
15
|
+
# Formats that require ffmpeg
|
|
16
|
+
FFMPEG_FORMATS = {".mp3", ".aac", ".m4a", ".wma", ".opus"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_audio(path: str | Path) -> tuple[np.ndarray, AudioMetadata]:
|
|
20
|
+
"""Load an audio file and return the signal and metadata.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
path: Path to the audio file.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Tuple of (audio signal as float64 ndarray, metadata).
|
|
27
|
+
Signal is always 2D: (frames, channels). Mono files get shape (frames, 1).
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
FileNotFoundError: If the file does not exist.
|
|
31
|
+
ValueError: If the format is unsupported or requires ffmpeg.
|
|
32
|
+
RuntimeError: If the file cannot be read.
|
|
33
|
+
"""
|
|
34
|
+
filepath = Path(path)
|
|
35
|
+
|
|
36
|
+
if not filepath.exists():
|
|
37
|
+
raise FileNotFoundError(f"File not found: {filepath}")
|
|
38
|
+
|
|
39
|
+
suffix = filepath.suffix.lower()
|
|
40
|
+
|
|
41
|
+
if suffix in FFMPEG_FORMATS:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
f"Format '{suffix}' requires ffmpeg, which is not installed or not supported yet. "
|
|
44
|
+
f"Supported formats without ffmpeg: {', '.join(sorted(NATIVE_FORMATS))}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if suffix not in NATIVE_FORMATS:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"Unsupported audio format: '{suffix}'. "
|
|
50
|
+
f"Supported: {', '.join(sorted(NATIVE_FORMATS | FFMPEG_FORMATS))}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
info = sf.info(str(filepath))
|
|
55
|
+
except RuntimeError as e:
|
|
56
|
+
raise RuntimeError(f"Cannot read audio file: {filepath} ({e})") from e
|
|
57
|
+
|
|
58
|
+
data, sample_rate = sf.read(str(filepath), dtype="float64", always_2d=True)
|
|
59
|
+
|
|
60
|
+
metadata = AudioMetadata(
|
|
61
|
+
path=str(filepath),
|
|
62
|
+
duration=len(data) / sample_rate,
|
|
63
|
+
sample_rate=sample_rate,
|
|
64
|
+
channels=data.shape[1],
|
|
65
|
+
bit_depth=_subtype_to_bits(info.subtype),
|
|
66
|
+
format_name=info.format,
|
|
67
|
+
frames=len(data),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return data, metadata
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _subtype_to_bits(subtype: str) -> int | None:
|
|
74
|
+
"""Convert soundfile subtype string to bit depth."""
|
|
75
|
+
mapping: dict[str, int] = {
|
|
76
|
+
"PCM_16": 16,
|
|
77
|
+
"PCM_24": 24,
|
|
78
|
+
"PCM_32": 32,
|
|
79
|
+
"PCM_S8": 8,
|
|
80
|
+
"PCM_U8": 8,
|
|
81
|
+
"FLOAT": 32,
|
|
82
|
+
"DOUBLE": 64,
|
|
83
|
+
}
|
|
84
|
+
return mapping.get(subtype)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def format_duration(seconds: float) -> str:
|
|
88
|
+
"""Format a duration in seconds as M:SS.mmm."""
|
|
89
|
+
minutes = int(seconds // 60)
|
|
90
|
+
secs = seconds % 60
|
|
91
|
+
return f"{minutes}:{secs:06.3f}"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def format_channels(n: int) -> str:
|
|
95
|
+
"""Format channel count as a human-readable string."""
|
|
96
|
+
if n == 1:
|
|
97
|
+
return "mono"
|
|
98
|
+
if n == 2:
|
|
99
|
+
return "stereo"
|
|
100
|
+
return f"{n}ch"
|
sounddiff/loudness.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Loudness analysis: integrated LUFS, true peak, loudness range."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pyloudnorm as pyln
|
|
7
|
+
|
|
8
|
+
from sounddiff.types import LoudnessComparison, LoudnessResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def measure_loudness(data: np.ndarray, sample_rate: int) -> LoudnessResult:
|
|
12
|
+
"""Measure loudness metrics for an audio signal.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
data: Audio signal, shape (frames, channels).
|
|
16
|
+
sample_rate: Sample rate in Hz.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
LoudnessResult with LUFS, true peak, and loudness range.
|
|
20
|
+
"""
|
|
21
|
+
meter = pyln.Meter(sample_rate)
|
|
22
|
+
|
|
23
|
+
# pyloudnorm expects (samples, channels) which is what we have
|
|
24
|
+
lufs = meter.integrated_loudness(data)
|
|
25
|
+
|
|
26
|
+
# True peak: find the maximum absolute sample value across all channels,
|
|
27
|
+
# convert to dBTP. This is a simplified true peak (sample peak).
|
|
28
|
+
# Full ITU-R BS.1770 true peak requires 4x oversampling, but sample peak
|
|
29
|
+
# is a reasonable approximation for comparison purposes.
|
|
30
|
+
peak_linear = np.max(np.abs(data))
|
|
31
|
+
true_peak_dbtp = 20 * np.log10(peak_linear) if peak_linear > 0 else -np.inf
|
|
32
|
+
|
|
33
|
+
# Loudness range (LRA): difference between the 95th and 10th percentile
|
|
34
|
+
# of short-term loudness measurements
|
|
35
|
+
lra = _compute_loudness_range(data, sample_rate)
|
|
36
|
+
|
|
37
|
+
return LoudnessResult(
|
|
38
|
+
lufs=round(lufs, 1),
|
|
39
|
+
true_peak_dbtp=round(float(true_peak_dbtp), 1),
|
|
40
|
+
loudness_range=round(lra, 1),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def compare_loudness(
|
|
45
|
+
data_a: np.ndarray,
|
|
46
|
+
data_b: np.ndarray,
|
|
47
|
+
sample_rate_a: int,
|
|
48
|
+
sample_rate_b: int,
|
|
49
|
+
) -> LoudnessComparison:
|
|
50
|
+
"""Compare loudness between two audio signals.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
data_a: First audio signal.
|
|
54
|
+
data_b: Second audio signal.
|
|
55
|
+
sample_rate_a: Sample rate of first signal.
|
|
56
|
+
sample_rate_b: Sample rate of second signal.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
LoudnessComparison with measurements for both files.
|
|
60
|
+
"""
|
|
61
|
+
result_a = measure_loudness(data_a, sample_rate_a)
|
|
62
|
+
result_b = measure_loudness(data_b, sample_rate_b)
|
|
63
|
+
return LoudnessComparison(file_a=result_a, file_b=result_b)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _compute_loudness_range(data: np.ndarray, sample_rate: int) -> float:
|
|
67
|
+
"""Compute loudness range (LRA) using short-term loudness measurements.
|
|
68
|
+
|
|
69
|
+
Uses 3-second windows with 2-second overlap per EBU R128.
|
|
70
|
+
"""
|
|
71
|
+
window_size = int(3.0 * sample_rate)
|
|
72
|
+
hop_size = int(1.0 * sample_rate) # 2s overlap = 1s hop
|
|
73
|
+
meter = pyln.Meter(sample_rate)
|
|
74
|
+
|
|
75
|
+
if len(data) < window_size:
|
|
76
|
+
return 0.0
|
|
77
|
+
|
|
78
|
+
short_term_levels: list[float] = []
|
|
79
|
+
for start in range(0, len(data) - window_size + 1, hop_size):
|
|
80
|
+
window = data[start : start + window_size]
|
|
81
|
+
level = meter.integrated_loudness(window)
|
|
82
|
+
if np.isfinite(level):
|
|
83
|
+
short_term_levels.append(level)
|
|
84
|
+
|
|
85
|
+
if len(short_term_levels) < 2:
|
|
86
|
+
return 0.0
|
|
87
|
+
|
|
88
|
+
levels = np.array(short_term_levels)
|
|
89
|
+
|
|
90
|
+
# Gate: exclude windows below absolute threshold (-70 LUFS)
|
|
91
|
+
levels = levels[levels > -70.0]
|
|
92
|
+
if len(levels) < 2:
|
|
93
|
+
return 0.0
|
|
94
|
+
|
|
95
|
+
# LRA: difference between 95th and 10th percentiles
|
|
96
|
+
p95 = float(np.percentile(levels, 95))
|
|
97
|
+
p10 = float(np.percentile(levels, 10))
|
|
98
|
+
return max(0.0, p95 - p10)
|