visqol-python 3.3.4__tar.gz → 3.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {visqol_python-3.3.4/visqol_python.egg-info → visqol_python-3.3.5}/PKG-INFO +1 -1
- {visqol_python-3.3.4 → visqol_python-3.3.5}/pyproject.toml +6 -1
- {visqol_python-3.3.4 → visqol_python-3.3.5}/tests/test_quick.py +80 -1
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/__init__.py +4 -2
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/__main__.py +44 -26
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/alignment.py +22 -12
- visqol_python-3.3.5/visqol/analysis_window.py +76 -0
- visqol_python-3.3.5/visqol/api.py +181 -0
- visqol_python-3.3.5/visqol/audio_utils.py +128 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/gammatone.py +123 -100
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/nsim.py +45 -44
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/patch_creator.py +95 -63
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/patch_selector.py +101 -72
- visqol_python-3.3.5/visqol/py.typed +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/quality_mapper.py +56 -45
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/signal_utils.py +26 -18
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/visqol_core.py +74 -52
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/visqol_manager.py +94 -64
- {visqol_python-3.3.4 → visqol_python-3.3.5/visqol_python.egg-info}/PKG-INFO +1 -1
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol_python.egg-info/SOURCES.txt +1 -0
- visqol_python-3.3.4/visqol/analysis_window.py +0 -52
- visqol_python-3.3.4/visqol/api.py +0 -110
- visqol_python-3.3.4/visqol/audio_utils.py +0 -90
- {visqol_python-3.3.4 → visqol_python-3.3.5}/LICENSE +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/MANIFEST.in +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/README.md +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/requirements.txt +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/setup.cfg +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/tests/test_conformance.py +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol/model/libsvm_nu_svr_model.txt +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol_python.egg-info/dependency_links.txt +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol_python.egg-info/entry_points.txt +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol_python.egg-info/requires.txt +0 -0
- {visqol_python-3.3.4 → visqol_python-3.3.5}/visqol_python.egg-info/top_level.txt +0 -0
|
@@ -56,7 +56,12 @@ version = {attr = "visqol.__version__"}
|
|
|
56
56
|
exclude = ["tests*"]
|
|
57
57
|
|
|
58
58
|
[tool.setuptools.package-data]
|
|
59
|
-
visqol = ["model/*.txt"]
|
|
59
|
+
visqol = ["model/*.txt", "py.typed"]
|
|
60
60
|
|
|
61
61
|
[tool.pytest.ini_options]
|
|
62
62
|
testpaths = ["tests"]
|
|
63
|
+
|
|
64
|
+
[tool.mypy]
|
|
65
|
+
strict = true
|
|
66
|
+
warn_return_any = true
|
|
67
|
+
warn_unused_configs = true
|
|
@@ -7,7 +7,10 @@ These tests verify basic API functionality without requiring external testdata.
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pytest
|
|
9
9
|
|
|
10
|
-
from visqol import VisqolApi
|
|
10
|
+
from visqol import VisqolApi, SimilarityResult, AudioSignal
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── API creation ──
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class TestApiCreation:
|
|
@@ -26,6 +29,69 @@ class TestApiCreation:
|
|
|
26
29
|
api = VisqolApi()
|
|
27
30
|
api.create()
|
|
28
31
|
|
|
32
|
+
def test_create_case_insensitive(self):
|
|
33
|
+
api = VisqolApi()
|
|
34
|
+
api.create(mode="SPEECH")
|
|
35
|
+
|
|
36
|
+
def test_create_invalid_mode_raises(self):
|
|
37
|
+
api = VisqolApi()
|
|
38
|
+
with pytest.raises(ValueError, match="Invalid mode"):
|
|
39
|
+
api.create(mode="invalid")
|
|
40
|
+
|
|
41
|
+
def test_create_negative_search_window_raises(self):
|
|
42
|
+
api = VisqolApi()
|
|
43
|
+
with pytest.raises(ValueError, match="search_window"):
|
|
44
|
+
api.create(search_window=-1)
|
|
45
|
+
|
|
46
|
+
def test_create_missing_model_raises(self):
|
|
47
|
+
api = VisqolApi()
|
|
48
|
+
with pytest.raises(FileNotFoundError):
|
|
49
|
+
api.create(mode="audio", model_path="/nonexistent/model.txt")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ── Measure guards ──
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TestMeasureGuards:
|
|
56
|
+
"""Test that measure() raises helpful errors for bad inputs."""
|
|
57
|
+
|
|
58
|
+
def test_measure_before_create_raises(self):
|
|
59
|
+
api = VisqolApi()
|
|
60
|
+
with pytest.raises(RuntimeError, match="create"):
|
|
61
|
+
api.measure("a.wav", "b.wav")
|
|
62
|
+
|
|
63
|
+
def test_measure_nonexistent_ref_raises(self):
|
|
64
|
+
api = VisqolApi()
|
|
65
|
+
api.create(mode="speech")
|
|
66
|
+
with pytest.raises(FileNotFoundError, match="Reference"):
|
|
67
|
+
api.measure("/nonexistent/ref.wav", "/nonexistent/deg.wav")
|
|
68
|
+
|
|
69
|
+
def test_measure_from_arrays_before_create_raises(self):
|
|
70
|
+
api = VisqolApi()
|
|
71
|
+
with pytest.raises(RuntimeError, match="create"):
|
|
72
|
+
api.measure_from_arrays(np.zeros(100), np.zeros(100), 16000)
|
|
73
|
+
|
|
74
|
+
def test_measure_from_arrays_bad_type_raises(self):
|
|
75
|
+
api = VisqolApi()
|
|
76
|
+
api.create(mode="speech")
|
|
77
|
+
with pytest.raises(TypeError, match="numpy array"):
|
|
78
|
+
api.measure_from_arrays([1, 2, 3], np.zeros(100), 16000) # type: ignore[arg-type]
|
|
79
|
+
|
|
80
|
+
def test_measure_from_arrays_empty_raises(self):
|
|
81
|
+
api = VisqolApi()
|
|
82
|
+
api.create(mode="speech")
|
|
83
|
+
with pytest.raises(ValueError, match="empty"):
|
|
84
|
+
api.measure_from_arrays(np.array([]), np.zeros(100), 16000)
|
|
85
|
+
|
|
86
|
+
def test_measure_from_arrays_bad_sr_raises(self):
|
|
87
|
+
api = VisqolApi()
|
|
88
|
+
api.create(mode="speech")
|
|
89
|
+
with pytest.raises(ValueError, match="sample_rate"):
|
|
90
|
+
api.measure_from_arrays(np.zeros(100), np.zeros(100), 0)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── measure_from_arrays ──
|
|
94
|
+
|
|
29
95
|
|
|
30
96
|
class TestMeasureFromArrays:
|
|
31
97
|
"""Test measure_from_arrays with synthetic signals."""
|
|
@@ -59,6 +125,9 @@ class TestMeasureFromArrays:
|
|
|
59
125
|
)
|
|
60
126
|
|
|
61
127
|
|
|
128
|
+
# ── Result fields ──
|
|
129
|
+
|
|
130
|
+
|
|
62
131
|
class TestResultFields:
|
|
63
132
|
"""Test that SimilarityResult has all expected fields."""
|
|
64
133
|
|
|
@@ -78,6 +147,9 @@ class TestResultFields:
|
|
|
78
147
|
assert hasattr(result, "patch_sims")
|
|
79
148
|
|
|
80
149
|
|
|
150
|
+
# ── Package metadata ──
|
|
151
|
+
|
|
152
|
+
|
|
81
153
|
class TestVersion:
|
|
82
154
|
"""Test package version is accessible."""
|
|
83
155
|
|
|
@@ -87,3 +159,10 @@ class TestVersion:
|
|
|
87
159
|
assert isinstance(visqol.__version__, str)
|
|
88
160
|
parts = visqol.__version__.split(".")
|
|
89
161
|
assert len(parts) >= 2, "Version should have at least major.minor"
|
|
162
|
+
|
|
163
|
+
def test_public_exports(self):
|
|
164
|
+
"""Package should export key classes."""
|
|
165
|
+
import visqol
|
|
166
|
+
assert hasattr(visqol, "VisqolApi")
|
|
167
|
+
assert hasattr(visqol, "SimilarityResult")
|
|
168
|
+
assert hasattr(visqol, "AudioSignal")
|
|
@@ -13,8 +13,10 @@ Usage:
|
|
|
13
13
|
print(f"MOS-LQO: {result.moslqo}")
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
__version__ = "3.3.
|
|
16
|
+
__version__: str = "3.3.5"
|
|
17
17
|
|
|
18
18
|
from visqol.api import VisqolApi
|
|
19
|
+
from visqol.visqol_core import SimilarityResult
|
|
20
|
+
from visqol.audio_utils import AudioSignal
|
|
19
21
|
|
|
20
|
-
__all__ = ["VisqolApi"]
|
|
22
|
+
__all__: list[str] = ["VisqolApi", "SimilarityResult", "AudioSignal"]
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
2
|
ViSQOL command-line interface.
|
|
3
3
|
|
|
4
|
-
Usage
|
|
4
|
+
Usage::
|
|
5
|
+
|
|
5
6
|
python -m visqol --reference ref.wav --degraded deg.wav [--speech_mode]
|
|
6
7
|
"""
|
|
7
8
|
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
8
11
|
import argparse
|
|
9
12
|
import sys
|
|
10
13
|
import logging
|
|
@@ -12,45 +15,46 @@ import logging
|
|
|
12
15
|
from visqol.api import VisqolApi
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
def main():
|
|
18
|
+
def main() -> None:
|
|
19
|
+
"""Entry point for the ``visqol`` CLI."""
|
|
16
20
|
parser = argparse.ArgumentParser(
|
|
17
|
-
description="ViSQOL - Virtual Speech Quality Objective Listener (Python)"
|
|
21
|
+
description="ViSQOL - Virtual Speech Quality Objective Listener (Python)",
|
|
18
22
|
)
|
|
19
23
|
parser.add_argument(
|
|
20
24
|
"--reference", "-r", required=True,
|
|
21
|
-
help="Path to reference audio file (WAV)"
|
|
25
|
+
help="Path to reference audio file (WAV)",
|
|
22
26
|
)
|
|
23
27
|
parser.add_argument(
|
|
24
28
|
"--degraded", "-d", required=True,
|
|
25
|
-
help="Path to degraded audio file (WAV)"
|
|
29
|
+
help="Path to degraded audio file (WAV)",
|
|
26
30
|
)
|
|
27
31
|
parser.add_argument(
|
|
28
32
|
"--speech_mode", action="store_true",
|
|
29
|
-
help="Use speech mode (
|
|
33
|
+
help="Use speech mode (16 kHz, exponential mapping)",
|
|
30
34
|
)
|
|
31
35
|
parser.add_argument(
|
|
32
36
|
"--model", default=None,
|
|
33
|
-
help="Path to SVR model file (Audio mode only)"
|
|
37
|
+
help="Path to SVR model file (Audio mode only)",
|
|
34
38
|
)
|
|
35
39
|
parser.add_argument(
|
|
36
40
|
"--search_window", type=int, default=60,
|
|
37
|
-
help="Search window radius (default: 60)"
|
|
41
|
+
help="Search window radius (default: 60)",
|
|
38
42
|
)
|
|
39
43
|
parser.add_argument(
|
|
40
44
|
"--unscaled_speech", action="store_true",
|
|
41
|
-
help="Don't scale speech MOS to max 5.0"
|
|
45
|
+
help="Don't scale speech MOS to max 5.0",
|
|
42
46
|
)
|
|
43
47
|
parser.add_argument(
|
|
44
48
|
"--no_alignment", action="store_true",
|
|
45
|
-
help="Disable global alignment"
|
|
49
|
+
help="Disable global alignment",
|
|
46
50
|
)
|
|
47
51
|
parser.add_argument(
|
|
48
52
|
"--no_realignment", action="store_true",
|
|
49
|
-
help="Disable fine realignment"
|
|
53
|
+
help="Disable fine realignment",
|
|
50
54
|
)
|
|
51
55
|
parser.add_argument(
|
|
52
56
|
"--verbose", "-v", action="store_true",
|
|
53
|
-
help="Enable verbose output"
|
|
57
|
+
help="Enable verbose output",
|
|
54
58
|
)
|
|
55
59
|
|
|
56
60
|
args = parser.parse_args()
|
|
@@ -60,18 +64,30 @@ def main():
|
|
|
60
64
|
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
|
|
61
65
|
|
|
62
66
|
# Run ViSQOL
|
|
63
|
-
mode = "speech" if args.speech_mode else "audio"
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
mode: str = "speech" if args.speech_mode else "audio"
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
api = VisqolApi()
|
|
71
|
+
api.create(
|
|
72
|
+
mode=mode,
|
|
73
|
+
model_path=args.model,
|
|
74
|
+
search_window=args.search_window,
|
|
75
|
+
use_unscaled_speech=args.unscaled_speech,
|
|
76
|
+
disable_global_alignment=args.no_alignment,
|
|
77
|
+
disable_realignment=args.no_realignment,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
result = api.measure(args.reference, args.degraded)
|
|
73
81
|
|
|
74
|
-
|
|
82
|
+
except FileNotFoundError as exc:
|
|
83
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
except ValueError as exc:
|
|
86
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
except RuntimeError as exc:
|
|
89
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
90
|
+
sys.exit(1)
|
|
75
91
|
|
|
76
92
|
# Output results
|
|
77
93
|
print(f"MOS-LQO: {result.moslqo:.6f}")
|
|
@@ -83,9 +99,11 @@ def main():
|
|
|
83
99
|
print(f"FVDEGENERGY: {result.fvdegenergy}")
|
|
84
100
|
print(f"Patches: {len(result.patch_sims)}")
|
|
85
101
|
for i, p in enumerate(result.patch_sims):
|
|
86
|
-
print(
|
|
87
|
-
|
|
88
|
-
|
|
102
|
+
print(
|
|
103
|
+
f" Patch {i}: sim={p.similarity:.4f} "
|
|
104
|
+
f"ref=[{p.ref_patch_start_time:.3f}-{p.ref_patch_end_time:.3f}] "
|
|
105
|
+
f"deg=[{p.deg_patch_start_time:.3f}-{p.deg_patch_end_time:.3f}]"
|
|
106
|
+
)
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
if __name__ == "__main__":
|
|
@@ -4,31 +4,38 @@ Global signal alignment using upper envelope cross-correlation.
|
|
|
4
4
|
Corresponds to C++ file: alignment.cc
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Tuple
|
|
10
|
+
|
|
7
11
|
import numpy as np
|
|
12
|
+
|
|
8
13
|
from visqol.audio_utils import AudioSignal
|
|
9
14
|
from visqol.signal_utils import upper_envelope, find_best_lag
|
|
10
15
|
|
|
11
16
|
|
|
12
|
-
def globally_align(
|
|
17
|
+
def globally_align(
|
|
18
|
+
reference: AudioSignal, degraded: AudioSignal
|
|
19
|
+
) -> Tuple[AudioSignal, float]:
|
|
13
20
|
"""
|
|
14
21
|
Globally align degraded signal to reference signal.
|
|
15
22
|
|
|
16
23
|
Uses upper envelope cross-correlation to find the best time-domain lag,
|
|
17
24
|
then shifts the degraded signal accordingly.
|
|
18
25
|
|
|
19
|
-
Matches C++ Alignment::GloballyAlign
|
|
26
|
+
Matches C++ ``Alignment::GloballyAlign``.
|
|
20
27
|
|
|
21
28
|
Args:
|
|
22
29
|
reference: Reference audio signal.
|
|
23
30
|
degraded: Degraded audio signal.
|
|
24
31
|
|
|
25
32
|
Returns:
|
|
26
|
-
Tuple of (aligned_degraded
|
|
33
|
+
Tuple of ``(aligned_degraded, lag_seconds)``.
|
|
27
34
|
"""
|
|
28
35
|
ref_env = upper_envelope(reference.data)
|
|
29
36
|
deg_env = upper_envelope(degraded.data)
|
|
30
37
|
|
|
31
|
-
best_lag = find_best_lag(ref_env, deg_env)
|
|
38
|
+
best_lag: int = find_best_lag(ref_env, deg_env)
|
|
32
39
|
|
|
33
40
|
# Limit lag to half the reference length
|
|
34
41
|
if best_lag == 0 or abs(best_lag) > len(reference.data) / 2.0:
|
|
@@ -42,19 +49,20 @@ def globally_align(reference: AudioSignal, degraded: AudioSignal) -> tuple:
|
|
|
42
49
|
new_data = np.concatenate([np.zeros(best_lag), degraded.data])
|
|
43
50
|
|
|
44
51
|
aligned_signal = AudioSignal(new_data, degraded.sample_rate)
|
|
45
|
-
lag_seconds = best_lag / float(degraded.sample_rate)
|
|
52
|
+
lag_seconds: float = best_lag / float(degraded.sample_rate)
|
|
46
53
|
return aligned_signal, lag_seconds
|
|
47
54
|
|
|
48
55
|
|
|
49
|
-
def align_and_truncate(
|
|
50
|
-
|
|
56
|
+
def align_and_truncate(
|
|
57
|
+
reference: AudioSignal, degraded: AudioSignal
|
|
58
|
+
) -> Tuple[AudioSignal, AudioSignal, float]:
|
|
51
59
|
"""
|
|
52
60
|
Align and truncate signals to the same length.
|
|
53
61
|
|
|
54
|
-
Matches C++ Alignment::AlignAndTruncate
|
|
62
|
+
Matches C++ ``Alignment::AlignAndTruncate``.
|
|
55
63
|
|
|
56
64
|
Returns:
|
|
57
|
-
Tuple of (aligned_ref
|
|
65
|
+
Tuple of ``(aligned_ref, aligned_deg, lag_seconds)``.
|
|
58
66
|
"""
|
|
59
67
|
aligned_deg, lag_seconds = globally_align(reference, degraded)
|
|
60
68
|
|
|
@@ -77,6 +85,8 @@ def align_and_truncate(reference: AudioSignal,
|
|
|
77
85
|
ref_data = ref_data[:min_len]
|
|
78
86
|
deg_data = deg_data[:min_len]
|
|
79
87
|
|
|
80
|
-
return (
|
|
81
|
-
|
|
82
|
-
|
|
88
|
+
return (
|
|
89
|
+
AudioSignal(ref_data, reference.sample_rate),
|
|
90
|
+
AudioSignal(deg_data, degraded.sample_rate),
|
|
91
|
+
lag_seconds,
|
|
92
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Analysis window for spectrogram construction.
|
|
3
|
+
|
|
4
|
+
Corresponds to C++ files: analysis_window.cc/h
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from numpy.typing import NDArray
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AnalysisWindow:
|
|
14
|
+
"""
|
|
15
|
+
Analysis window used for spectrogram frame windowing.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
size: Window size in samples.
|
|
19
|
+
overlap: Overlap ratio (e.g. 0.25 means 25 % of window size as hop).
|
|
20
|
+
window_duration: Duration of window in seconds.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
sample_rate: int,
|
|
26
|
+
overlap: float = 0.25,
|
|
27
|
+
window_duration: float = 0.08,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Args:
|
|
31
|
+
sample_rate: Sample rate of the audio signal.
|
|
32
|
+
overlap: Overlap as a fraction of window size
|
|
33
|
+
(used as ``hop = size * overlap``).
|
|
34
|
+
window_duration: Duration of the analysis window in seconds.
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValueError: If *sample_rate* ≤ 0 or *overlap* not in (0, 1).
|
|
38
|
+
"""
|
|
39
|
+
if sample_rate <= 0:
|
|
40
|
+
raise ValueError(f"sample_rate must be positive, got {sample_rate}")
|
|
41
|
+
if not (0.0 < overlap < 1.0):
|
|
42
|
+
raise ValueError(f"overlap must be in (0, 1), got {overlap}")
|
|
43
|
+
|
|
44
|
+
self.window_duration: float = window_duration
|
|
45
|
+
self.overlap: float = overlap
|
|
46
|
+
self.size: int = int(round(sample_rate * window_duration))
|
|
47
|
+
self._hann_window: NDArray[np.float64] | None = None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def hop_size(self) -> int:
|
|
51
|
+
"""Hop size = window_size × overlap."""
|
|
52
|
+
return int(self.size * self.overlap)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def hann_window(self) -> NDArray[np.float64]:
|
|
56
|
+
"""Pre-computed Hann window."""
|
|
57
|
+
if self._hann_window is None:
|
|
58
|
+
# Match C++ exactly: 0.5 − 0.5 * cos(2π i / (size − 1))
|
|
59
|
+
n = self.size
|
|
60
|
+
self._hann_window = 0.5 - 0.5 * np.cos(
|
|
61
|
+
2.0 * np.pi * np.arange(n) / (n - 1)
|
|
62
|
+
)
|
|
63
|
+
return self._hann_window
|
|
64
|
+
|
|
65
|
+
def apply_hann_window(self, frame: NDArray[np.float64]) -> NDArray[np.float64]:
|
|
66
|
+
"""Apply Hann window to a frame.
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ValueError: If *frame* length does not match window size.
|
|
70
|
+
"""
|
|
71
|
+
if len(frame) != self.size:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Frame length ({len(frame)}) does not match "
|
|
74
|
+
f"window size ({self.size})."
|
|
75
|
+
)
|
|
76
|
+
return frame * self.hann_window
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ViSQOL public API.
|
|
3
|
+
|
|
4
|
+
Provides a simple interface for comparing audio quality.
|
|
5
|
+
|
|
6
|
+
Corresponds to C++ file: visqol_api.cc
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from numpy.typing import NDArray
|
|
16
|
+
|
|
17
|
+
from visqol.audio_utils import AudioSignal
|
|
18
|
+
from visqol.visqol_manager import VisqolManager
|
|
19
|
+
from visqol.visqol_core import SimilarityResult
|
|
20
|
+
|
|
21
|
+
# Valid mode names
|
|
22
|
+
_VALID_MODES = frozenset({"audio", "speech"})
|
|
23
|
+
|
|
24
|
+
# Default SVR model path (bundled inside the package)
|
|
25
|
+
_DEFAULT_MODEL_DIR: str = os.path.join(os.path.dirname(__file__), "model")
|
|
26
|
+
_DEFAULT_SVR_MODEL: str = os.path.join(_DEFAULT_MODEL_DIR, "libsvm_nu_svr_model.txt")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class VisqolApi:
|
|
30
|
+
"""
|
|
31
|
+
Public API for ViSQOL audio quality assessment.
|
|
32
|
+
|
|
33
|
+
Usage::
|
|
34
|
+
|
|
35
|
+
api = VisqolApi()
|
|
36
|
+
api.create(mode="audio")
|
|
37
|
+
result = api.measure("reference.wav", "degraded.wav")
|
|
38
|
+
print(f"MOS-LQO: {result.moslqo}")
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self) -> None:
|
|
42
|
+
self._manager: VisqolManager = VisqolManager()
|
|
43
|
+
self._is_created: bool = False
|
|
44
|
+
|
|
45
|
+
def create(
|
|
46
|
+
self,
|
|
47
|
+
mode: str = "audio",
|
|
48
|
+
model_path: Optional[str] = None,
|
|
49
|
+
search_window: int = 60,
|
|
50
|
+
use_unscaled_speech: bool = False,
|
|
51
|
+
disable_global_alignment: bool = False,
|
|
52
|
+
disable_realignment: bool = False,
|
|
53
|
+
) -> None:
|
|
54
|
+
"""
|
|
55
|
+
Initialize ViSQOL with the specified configuration.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
mode: ``"audio"`` for music/general audio (48 kHz, SVR model) or
|
|
59
|
+
``"speech"`` for speech signals (16 kHz, exponential fit).
|
|
60
|
+
model_path: Path to SVR model file (Audio mode only).
|
|
61
|
+
If *None*, uses the bundled default model.
|
|
62
|
+
search_window: Search window radius (default 60).
|
|
63
|
+
use_unscaled_speech: If *True*, don't scale speech MOS to 5.0.
|
|
64
|
+
disable_global_alignment: Skip global alignment step.
|
|
65
|
+
disable_realignment: Skip fine realignment step.
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
ValueError: If *mode* is not ``"audio"`` or ``"speech"``.
|
|
69
|
+
ValueError: If *search_window* is not positive.
|
|
70
|
+
FileNotFoundError: If *model_path* is given but does not exist.
|
|
71
|
+
"""
|
|
72
|
+
mode_lower = mode.lower()
|
|
73
|
+
if mode_lower not in _VALID_MODES:
|
|
74
|
+
raise ValueError(
|
|
75
|
+
f"Invalid mode {mode!r}. Must be one of {sorted(_VALID_MODES)}."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if search_window <= 0:
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"search_window must be a positive integer, got {search_window}."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
use_speech_mode = mode_lower == "speech"
|
|
84
|
+
|
|
85
|
+
if not use_speech_mode and model_path is None:
|
|
86
|
+
model_path = _DEFAULT_SVR_MODEL
|
|
87
|
+
|
|
88
|
+
if model_path is not None and not os.path.isfile(model_path):
|
|
89
|
+
raise FileNotFoundError(
|
|
90
|
+
f"SVR model file not found: {model_path}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
self._manager.init(
|
|
94
|
+
model_path=model_path or "",
|
|
95
|
+
use_speech_mode=use_speech_mode,
|
|
96
|
+
use_unscaled_speech=use_unscaled_speech,
|
|
97
|
+
search_window=search_window,
|
|
98
|
+
disable_global_alignment=disable_global_alignment,
|
|
99
|
+
disable_realignment=disable_realignment,
|
|
100
|
+
)
|
|
101
|
+
self._is_created = True
|
|
102
|
+
|
|
103
|
+
def measure(self, ref_path: str, deg_path: str) -> SimilarityResult:
|
|
104
|
+
"""
|
|
105
|
+
Compare two audio files and return quality assessment.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
ref_path: Path to reference audio file (WAV).
|
|
109
|
+
deg_path: Path to degraded audio file (WAV).
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
:class:`SimilarityResult` containing MOS-LQO score and detailed results.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
RuntimeError: If :meth:`create` has not been called.
|
|
116
|
+
FileNotFoundError: If either audio file does not exist.
|
|
117
|
+
"""
|
|
118
|
+
self._ensure_created()
|
|
119
|
+
|
|
120
|
+
if not os.path.isfile(ref_path):
|
|
121
|
+
raise FileNotFoundError(f"Reference audio file not found: {ref_path}")
|
|
122
|
+
if not os.path.isfile(deg_path):
|
|
123
|
+
raise FileNotFoundError(f"Degraded audio file not found: {deg_path}")
|
|
124
|
+
|
|
125
|
+
return self._manager.run(ref_path, deg_path)
|
|
126
|
+
|
|
127
|
+
def measure_from_arrays(
|
|
128
|
+
self,
|
|
129
|
+
ref_array: NDArray[np.floating],
|
|
130
|
+
deg_array: NDArray[np.floating],
|
|
131
|
+
sample_rate: int,
|
|
132
|
+
) -> SimilarityResult:
|
|
133
|
+
"""
|
|
134
|
+
Compare two audio signals from numpy arrays.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
ref_array: Reference audio signal (1-D numpy array).
|
|
138
|
+
deg_array: Degraded audio signal (1-D numpy array).
|
|
139
|
+
sample_rate: Sample rate of both signals in Hz.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
:class:`SimilarityResult` containing MOS-LQO score and detailed results.
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
RuntimeError: If :meth:`create` has not been called.
|
|
146
|
+
ValueError: If arrays are empty or *sample_rate* is not positive.
|
|
147
|
+
TypeError: If arrays are not numpy arrays.
|
|
148
|
+
"""
|
|
149
|
+
self._ensure_created()
|
|
150
|
+
|
|
151
|
+
if not isinstance(ref_array, np.ndarray):
|
|
152
|
+
raise TypeError(
|
|
153
|
+
f"ref_array must be a numpy array, got {type(ref_array).__name__}"
|
|
154
|
+
)
|
|
155
|
+
if not isinstance(deg_array, np.ndarray):
|
|
156
|
+
raise TypeError(
|
|
157
|
+
f"deg_array must be a numpy array, got {type(deg_array).__name__}"
|
|
158
|
+
)
|
|
159
|
+
if ref_array.size == 0:
|
|
160
|
+
raise ValueError("ref_array must not be empty.")
|
|
161
|
+
if deg_array.size == 0:
|
|
162
|
+
raise ValueError("deg_array must not be empty.")
|
|
163
|
+
if sample_rate <= 0:
|
|
164
|
+
raise ValueError(
|
|
165
|
+
f"sample_rate must be a positive integer, got {sample_rate}."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
ref_signal = AudioSignal(ref_array, sample_rate)
|
|
169
|
+
deg_signal = AudioSignal(deg_array, sample_rate)
|
|
170
|
+
return self._manager.run_from_signals(ref_signal, deg_signal)
|
|
171
|
+
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
# Private helpers
|
|
174
|
+
# ------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def _ensure_created(self) -> None:
|
|
177
|
+
"""Raise if :meth:`create` has not been called."""
|
|
178
|
+
if not self._is_created:
|
|
179
|
+
raise RuntimeError(
|
|
180
|
+
"VisqolApi must be created (call .create()) before measuring."
|
|
181
|
+
)
|