sounddiff 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sounddiff/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """sounddiff: structured audio comparison for producers and developers."""
2
+
3
+ __version__ = "0.1.0"
sounddiff/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running sounddiff as `python -m sounddiff`."""
2
+
3
+ from sounddiff.cli import main
4
+
5
+ main()
sounddiff/cli.py ADDED
@@ -0,0 +1,79 @@
1
+ """CLI entry point for sounddiff."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ import click
8
+
9
+ from sounddiff import __version__
10
+ from sounddiff.core import diff
11
+ from sounddiff.report import render
12
+ from sounddiff.types import OutputFormat
13
+
14
+
15
+ @click.command()
16
+ @click.argument("file_a", type=click.Path(exists=True))
17
+ @click.argument("file_b", type=click.Path(exists=True))
18
+ @click.option(
19
+ "--format",
20
+ "output_format",
21
+ type=click.Choice(["terminal", "json", "html"]),
22
+ default="terminal",
23
+ help="Output format.",
24
+ )
25
+ @click.option(
26
+ "-o",
27
+ "--output",
28
+ "output_path",
29
+ type=click.Path(),
30
+ default=None,
31
+ help="Write output to a file (useful with --format html).",
32
+ )
33
+ @click.option(
34
+ "--verbose",
35
+ is_flag=True,
36
+ default=False,
37
+ help="Show additional detail.",
38
+ )
39
+ @click.option(
40
+ "--no-color",
41
+ is_flag=True,
42
+ default=False,
43
+ help="Disable colored terminal output.",
44
+ )
45
+ @click.version_option(version=__version__, prog_name="sounddiff")
46
+ def main(
47
+ file_a: str,
48
+ file_b: str,
49
+ output_format: str,
50
+ output_path: str | None,
51
+ verbose: bool,
52
+ no_color: bool,
53
+ ) -> None:
54
+ """Compare two audio files and report what changed.
55
+
56
+ sounddiff FILE_A FILE_B
57
+
58
+ Compares FILE_A (reference) against FILE_B (comparison) and reports
59
+ differences in loudness, spectral content, timing, and potential issues.
60
+ """
61
+ try:
62
+ result = diff(file_a, file_b)
63
+ except FileNotFoundError as e:
64
+ click.echo(f"Error: {e}", err=True)
65
+ sys.exit(1)
66
+ except ValueError as e:
67
+ click.echo(f"Error: {e}", err=True)
68
+ sys.exit(1)
69
+ except RuntimeError as e:
70
+ click.echo(f"Error: {e}", err=True)
71
+ sys.exit(1)
72
+
73
+ fmt = OutputFormat(output_format)
74
+ output = render(result, fmt, output_path, no_color=no_color)
75
+
76
+ if output_path and fmt == OutputFormat.HTML:
77
+ click.echo(f"Report written to {output_path}")
78
+ else:
79
+ click.echo(output)
sounddiff/core.py ADDED
@@ -0,0 +1,66 @@
1
+ """Core orchestration: run the full comparison pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from sounddiff.detection import compare_detection
8
+ from sounddiff.formats import load_audio
9
+ from sounddiff.loudness import compare_loudness
10
+ from sounddiff.spectral import compare_spectral
11
+ from sounddiff.temporal import compare_temporal
12
+ from sounddiff.types import DiffResult, MetadataComparison
13
+
14
+
15
+ def diff(
16
+ path_a: str | Path,
17
+ path_b: str | Path,
18
+ ) -> DiffResult:
19
+ """Compare two audio files and return a structured diff.
20
+
21
+ Args:
22
+ path_a: Path to the first (reference) audio file.
23
+ path_b: Path to the second (comparison) audio file.
24
+
25
+ Returns:
26
+ DiffResult containing all analysis results.
27
+
28
+ Raises:
29
+ FileNotFoundError: If either file does not exist.
30
+ ValueError: If either file format is unsupported.
31
+ """
32
+ data_a, meta_a = load_audio(path_a)
33
+ data_b, meta_b = load_audio(path_b)
34
+
35
+ metadata = MetadataComparison(file_a=meta_a, file_b=meta_b)
36
+
37
+ warnings: list[str] = []
38
+ if not metadata.same_sample_rate:
39
+ warnings.append(
40
+ f"Sample rate mismatch: {meta_a.sample_rate} Hz vs {meta_b.sample_rate} Hz. "
41
+ "Comparison accuracy may be reduced."
42
+ )
43
+ if not metadata.same_channels:
44
+ warnings.append(
45
+ f"Channel count mismatch: {meta_a.channels} vs {meta_b.channels}. "
46
+ "Comparison will use mixed-to-mono signals where needed."
47
+ )
48
+
49
+ loudness = compare_loudness(data_a, data_b, meta_a.sample_rate, meta_b.sample_rate)
50
+ spectral = compare_spectral(data_a, data_b, meta_a.sample_rate, meta_b.sample_rate)
51
+ temporal = compare_temporal(data_a, data_b, meta_a.sample_rate)
52
+
53
+ file_a_name = Path(meta_a.path).name
54
+ file_b_name = Path(meta_b.path).name
55
+ detection = compare_detection(
56
+ data_a, data_b, meta_a.sample_rate, meta_b.sample_rate, file_a_name, file_b_name
57
+ )
58
+
59
+ return DiffResult(
60
+ metadata=metadata,
61
+ loudness=loudness,
62
+ spectral=spectral,
63
+ temporal=temporal,
64
+ detection=detection,
65
+ warnings=warnings,
66
+ )
sounddiff/detection.py ADDED
@@ -0,0 +1,164 @@
1
+ """Detection: clipping and silence analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+
7
+ from sounddiff.types import ClipEvent, DetectionResult, SilenceRegion
8
+
9
+
10
+ def detect_clipping(
11
+ data: np.ndarray,
12
+ sample_rate: int,
13
+ file_label: str,
14
+ threshold: float = 0.99,
15
+ min_consecutive: int = 2,
16
+ ) -> list[ClipEvent]:
17
+ """Detect clipping events in an audio signal.
18
+
19
+ Clipping is defined as consecutive samples at or above the threshold.
20
+
21
+ Args:
22
+ data: Audio signal, shape (frames, channels).
23
+ sample_rate: Sample rate in Hz.
24
+ file_label: Label for this file (used in output).
25
+ threshold: Amplitude threshold for clipping detection.
26
+ min_consecutive: Minimum consecutive samples to count as clipping.
27
+
28
+ Returns:
29
+ List of ClipEvent objects.
30
+ """
31
+ clips: list[ClipEvent] = []
32
+
33
+ for channel in range(data.shape[1]):
34
+ channel_data = np.abs(data[:, channel])
35
+ is_clipping = channel_data >= threshold
36
+
37
+ # Find runs of consecutive clipping samples
38
+ changes = np.diff(is_clipping.astype(int))
39
+ starts = np.where(changes == 1)[0] + 1
40
+ ends = np.where(changes == -1)[0] + 1
41
+
42
+ # Handle edge cases
43
+ if is_clipping[0]:
44
+ starts = np.concatenate([[0], starts])
45
+ if is_clipping[-1]:
46
+ ends = np.concatenate([ends, [len(channel_data)]])
47
+
48
+ for start, end in zip(starts, ends, strict=True):
49
+ count = end - start
50
+ if count >= min_consecutive:
51
+ timestamp = start / sample_rate
52
+ clips.append(
53
+ ClipEvent(
54
+ file_label=file_label,
55
+ timestamp=round(timestamp, 3),
56
+ channel=channel,
57
+ sample_count=int(count),
58
+ )
59
+ )
60
+
61
+ return clips
62
+
63
+
64
+ def detect_silence(
65
+ data: np.ndarray,
66
+ sample_rate: int,
67
+ file_label: str,
68
+ threshold_db: float = -60.0,
69
+ min_duration: float = 0.1,
70
+ ) -> list[SilenceRegion]:
71
+ """Detect regions of silence in an audio signal.
72
+
73
+ Silence is defined as RMS energy below the threshold for at least min_duration.
74
+
75
+ Args:
76
+ data: Audio signal, shape (frames, channels).
77
+ sample_rate: Sample rate in Hz.
78
+ file_label: Label for this file (used in output).
79
+ threshold_db: RMS threshold in dB for silence.
80
+ min_duration: Minimum duration in seconds for a silence region.
81
+
82
+ Returns:
83
+ List of SilenceRegion objects.
84
+ """
85
+ # Mix to mono for silence detection
86
+ mono = np.mean(data, axis=1)
87
+
88
+ # Compute RMS in short windows
89
+ window_size = int(0.01 * sample_rate) # 10ms windows
90
+ if window_size == 0:
91
+ return []
92
+
93
+ threshold_linear = 10 ** (threshold_db / 20.0)
94
+ min_samples = int(min_duration * sample_rate)
95
+
96
+ # Compute per-window RMS
97
+ n_windows = len(mono) // window_size
98
+ if n_windows == 0:
99
+ return []
100
+
101
+ truncated = mono[: n_windows * window_size].reshape(n_windows, window_size)
102
+ rms_values = np.sqrt(np.mean(truncated**2, axis=1))
103
+ is_silent = rms_values < threshold_linear
104
+
105
+ # Find runs of silence
106
+ regions: list[SilenceRegion] = []
107
+ changes = np.diff(is_silent.astype(int))
108
+ starts = np.where(changes == 1)[0] + 1
109
+ ends = np.where(changes == -1)[0] + 1
110
+
111
+ if is_silent[0]:
112
+ starts = np.concatenate([[0], starts])
113
+ if is_silent[-1]:
114
+ ends = np.concatenate([ends, [n_windows]])
115
+
116
+ for start, end in zip(starts, ends, strict=True):
117
+ sample_start = start * window_size
118
+ sample_end = end * window_size
119
+ duration_samples = sample_end - sample_start
120
+
121
+ if duration_samples >= min_samples:
122
+ regions.append(
123
+ SilenceRegion(
124
+ file_label=file_label,
125
+ start_time=round(sample_start / sample_rate, 3),
126
+ end_time=round(sample_end / sample_rate, 3),
127
+ )
128
+ )
129
+
130
+ return regions
131
+
132
+
133
+ def compare_detection(
134
+ data_a: np.ndarray,
135
+ data_b: np.ndarray,
136
+ sample_rate_a: int,
137
+ sample_rate_b: int,
138
+ file_a_name: str,
139
+ file_b_name: str,
140
+ ) -> DetectionResult:
141
+ """Run clipping and silence detection on both files.
142
+
143
+ Args:
144
+ data_a: First audio signal.
145
+ data_b: Second audio signal.
146
+ sample_rate_a: Sample rate of first signal.
147
+ sample_rate_b: Sample rate of second signal.
148
+ file_a_name: Display name for first file.
149
+ file_b_name: Display name for second file.
150
+
151
+ Returns:
152
+ DetectionResult with clips and silence regions.
153
+ """
154
+ clips_a = detect_clipping(data_a, sample_rate_a, file_a_name)
155
+ clips_b = detect_clipping(data_b, sample_rate_b, file_b_name)
156
+
157
+ silence_a = detect_silence(data_a, sample_rate_a, file_a_name)
158
+ silence_b = detect_silence(data_b, sample_rate_b, file_b_name)
159
+
160
+ return DetectionResult(
161
+ clips=clips_a + clips_b,
162
+ silence_regions_a=silence_a,
163
+ silence_regions_b=silence_b,
164
+ )
sounddiff/formats.py ADDED
@@ -0,0 +1,100 @@
1
+ """Audio file loading and format detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import numpy as np # noqa: TC002 (used at runtime in return type)
8
+ import soundfile as sf
9
+
10
+ from sounddiff.types import AudioMetadata
11
+
12
+ # Formats supported natively via libsndfile
13
+ NATIVE_FORMATS = {".wav", ".flac", ".ogg", ".aiff", ".aif"}
14
+
15
+ # Formats that require ffmpeg
16
+ FFMPEG_FORMATS = {".mp3", ".aac", ".m4a", ".wma", ".opus"}
17
+
18
+
19
+ def load_audio(path: str | Path) -> tuple[np.ndarray, AudioMetadata]:
20
+ """Load an audio file and return the signal and metadata.
21
+
22
+ Args:
23
+ path: Path to the audio file.
24
+
25
+ Returns:
26
+ Tuple of (audio signal as float64 ndarray, metadata).
27
+ Signal is always 2D: (frames, channels). Mono files get shape (frames, 1).
28
+
29
+ Raises:
30
+ FileNotFoundError: If the file does not exist.
31
+ ValueError: If the format is unsupported or requires ffmpeg.
32
+ RuntimeError: If the file cannot be read.
33
+ """
34
+ filepath = Path(path)
35
+
36
+ if not filepath.exists():
37
+ raise FileNotFoundError(f"File not found: {filepath}")
38
+
39
+ suffix = filepath.suffix.lower()
40
+
41
+ if suffix in FFMPEG_FORMATS:
42
+ raise ValueError(
43
+ f"Format '{suffix}' requires ffmpeg, which is not installed or not supported yet. "
44
+ f"Supported formats without ffmpeg: {', '.join(sorted(NATIVE_FORMATS))}"
45
+ )
46
+
47
+ if suffix not in NATIVE_FORMATS:
48
+ raise ValueError(
49
+ f"Unsupported audio format: '{suffix}'. "
50
+ f"Supported: {', '.join(sorted(NATIVE_FORMATS | FFMPEG_FORMATS))}"
51
+ )
52
+
53
+ try:
54
+ info = sf.info(str(filepath))
55
+ except RuntimeError as e:
56
+ raise RuntimeError(f"Cannot read audio file: {filepath} ({e})") from e
57
+
58
+ data, sample_rate = sf.read(str(filepath), dtype="float64", always_2d=True)
59
+
60
+ metadata = AudioMetadata(
61
+ path=str(filepath),
62
+ duration=len(data) / sample_rate,
63
+ sample_rate=sample_rate,
64
+ channels=data.shape[1],
65
+ bit_depth=_subtype_to_bits(info.subtype),
66
+ format_name=info.format,
67
+ frames=len(data),
68
+ )
69
+
70
+ return data, metadata
71
+
72
+
73
+ def _subtype_to_bits(subtype: str) -> int | None:
74
+ """Convert soundfile subtype string to bit depth."""
75
+ mapping: dict[str, int] = {
76
+ "PCM_16": 16,
77
+ "PCM_24": 24,
78
+ "PCM_32": 32,
79
+ "PCM_S8": 8,
80
+ "PCM_U8": 8,
81
+ "FLOAT": 32,
82
+ "DOUBLE": 64,
83
+ }
84
+ return mapping.get(subtype)
85
+
86
+
87
+ def format_duration(seconds: float) -> str:
88
+ """Format a duration in seconds as M:SS.mmm."""
89
+ minutes = int(seconds // 60)
90
+ secs = seconds % 60
91
+ return f"{minutes}:{secs:06.3f}"
92
+
93
+
94
+ def format_channels(n: int) -> str:
95
+ """Format channel count as a human-readable string."""
96
+ if n == 1:
97
+ return "mono"
98
+ if n == 2:
99
+ return "stereo"
100
+ return f"{n}ch"
sounddiff/loudness.py ADDED
@@ -0,0 +1,98 @@
1
+ """Loudness analysis: integrated LUFS, true peak, loudness range."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import pyloudnorm as pyln
7
+
8
+ from sounddiff.types import LoudnessComparison, LoudnessResult
9
+
10
+
11
+ def measure_loudness(data: np.ndarray, sample_rate: int) -> LoudnessResult:
12
+ """Measure loudness metrics for an audio signal.
13
+
14
+ Args:
15
+ data: Audio signal, shape (frames, channels).
16
+ sample_rate: Sample rate in Hz.
17
+
18
+ Returns:
19
+ LoudnessResult with LUFS, true peak, and loudness range.
20
+ """
21
+ meter = pyln.Meter(sample_rate)
22
+
23
+ # pyloudnorm expects (samples, channels) which is what we have
24
+ lufs = meter.integrated_loudness(data)
25
+
26
+ # True peak: find the maximum absolute sample value across all channels,
27
+ # convert to dBTP. This is a simplified true peak (sample peak).
28
+ # Full ITU-R BS.1770 true peak requires 4x oversampling, but sample peak
29
+ # is a reasonable approximation for comparison purposes.
30
+ peak_linear = np.max(np.abs(data))
31
+ true_peak_dbtp = 20 * np.log10(peak_linear) if peak_linear > 0 else -np.inf
32
+
33
+ # Loudness range (LRA): difference between the 95th and 10th percentile
34
+ # of short-term loudness measurements
35
+ lra = _compute_loudness_range(data, sample_rate)
36
+
37
+ return LoudnessResult(
38
+ lufs=round(lufs, 1),
39
+ true_peak_dbtp=round(float(true_peak_dbtp), 1),
40
+ loudness_range=round(lra, 1),
41
+ )
42
+
43
+
44
+ def compare_loudness(
45
+ data_a: np.ndarray,
46
+ data_b: np.ndarray,
47
+ sample_rate_a: int,
48
+ sample_rate_b: int,
49
+ ) -> LoudnessComparison:
50
+ """Compare loudness between two audio signals.
51
+
52
+ Args:
53
+ data_a: First audio signal.
54
+ data_b: Second audio signal.
55
+ sample_rate_a: Sample rate of first signal.
56
+ sample_rate_b: Sample rate of second signal.
57
+
58
+ Returns:
59
+ LoudnessComparison with measurements for both files.
60
+ """
61
+ result_a = measure_loudness(data_a, sample_rate_a)
62
+ result_b = measure_loudness(data_b, sample_rate_b)
63
+ return LoudnessComparison(file_a=result_a, file_b=result_b)
64
+
65
+
66
+ def _compute_loudness_range(data: np.ndarray, sample_rate: int) -> float:
67
+ """Compute loudness range (LRA) using short-term loudness measurements.
68
+
69
+ Uses 3-second windows with 2-second overlap per EBU R128.
70
+ """
71
+ window_size = int(3.0 * sample_rate)
72
+ hop_size = int(1.0 * sample_rate) # 2s overlap = 1s hop
73
+ meter = pyln.Meter(sample_rate)
74
+
75
+ if len(data) < window_size:
76
+ return 0.0
77
+
78
+ short_term_levels: list[float] = []
79
+ for start in range(0, len(data) - window_size + 1, hop_size):
80
+ window = data[start : start + window_size]
81
+ level = meter.integrated_loudness(window)
82
+ if np.isfinite(level):
83
+ short_term_levels.append(level)
84
+
85
+ if len(short_term_levels) < 2:
86
+ return 0.0
87
+
88
+ levels = np.array(short_term_levels)
89
+
90
+ # Gate: exclude windows below absolute threshold (-70 LUFS)
91
+ levels = levels[levels > -70.0]
92
+ if len(levels) < 2:
93
+ return 0.0
94
+
95
+ # LRA: difference between 95th and 10th percentiles
96
+ p95 = float(np.percentile(levels, 95))
97
+ p10 = float(np.percentile(levels, 10))
98
+ return max(0.0, p95 - p10)