visqol-python 3.3.3__tar.gz → 3.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {visqol_python-3.3.3/visqol_python.egg-info → visqol_python-3.3.5}/PKG-INFO +26 -18
- {visqol_python-3.3.3 → visqol_python-3.3.5}/README.md +21 -12
- {visqol_python-3.3.3 → visqol_python-3.3.5}/pyproject.toml +18 -4
- visqol_python-3.3.5/tests/test_conformance.py +125 -0
- visqol_python-3.3.5/tests/test_quick.py +168 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/__init__.py +4 -2
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/__main__.py +44 -26
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/alignment.py +22 -12
- visqol_python-3.3.5/visqol/analysis_window.py +76 -0
- visqol_python-3.3.5/visqol/api.py +181 -0
- visqol_python-3.3.5/visqol/audio_utils.py +128 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/gammatone.py +123 -100
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/nsim.py +45 -44
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/patch_creator.py +95 -63
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/patch_selector.py +101 -72
- visqol_python-3.3.5/visqol/py.typed +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/quality_mapper.py +56 -45
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/signal_utils.py +26 -18
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/visqol_core.py +74 -52
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/visqol_manager.py +94 -64
- {visqol_python-3.3.3 → visqol_python-3.3.5/visqol_python.egg-info}/PKG-INFO +26 -18
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol_python.egg-info/SOURCES.txt +1 -1
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol_python.egg-info/requires.txt +3 -0
- visqol_python-3.3.3/setup.py +0 -58
- visqol_python-3.3.3/tests/test_conformance.py +0 -173
- visqol_python-3.3.3/tests/test_quick.py +0 -81
- visqol_python-3.3.3/visqol/analysis_window.py +0 -52
- visqol_python-3.3.3/visqol/api.py +0 -110
- visqol_python-3.3.3/visqol/audio_utils.py +0 -90
- {visqol_python-3.3.3 → visqol_python-3.3.5}/LICENSE +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/MANIFEST.in +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/requirements.txt +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/setup.cfg +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol/model/libsvm_nu_svr_model.txt +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol_python.egg-info/dependency_links.txt +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol_python.egg-info/entry_points.txt +0 -0
- {visqol_python-3.3.3 → visqol_python-3.3.5}/visqol_python.egg-info/top_level.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visqol-python
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.5
|
|
4
4
|
Summary: ViSQOL - Virtual Speech Quality Objective Listener (Pure Python)
|
|
5
|
-
Home-page: https://github.com/talker93/visqol-python
|
|
6
5
|
Author: Shan Jiang
|
|
7
6
|
License-Expression: Apache-2.0
|
|
8
7
|
Project-URL: Homepage, https://github.com/talker93/visqol-python
|
|
8
|
+
Project-URL: Changelog, https://github.com/talker93/visqol-python/blob/main/CHANGELOG.md
|
|
9
9
|
Project-URL: Bug Reports, https://github.com/talker93/visqol-python/issues
|
|
10
10
|
Project-URL: Source, https://github.com/talker93/visqol-python
|
|
11
11
|
Project-URL: Original C++, https://github.com/google/visqol
|
|
@@ -14,7 +14,6 @@ Classifier: Development Status :: 4 - Beta
|
|
|
14
14
|
Classifier: Intended Audience :: Developers
|
|
15
15
|
Classifier: Intended Audience :: Science/Research
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.9
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -22,19 +21,24 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
22
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
23
22
|
Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
|
|
24
23
|
Classifier: Topic :: Scientific/Engineering
|
|
25
|
-
Requires-Python: >=3.
|
|
24
|
+
Requires-Python: >=3.9
|
|
26
25
|
Description-Content-Type: text/markdown
|
|
27
26
|
License-File: LICENSE
|
|
28
27
|
Requires-Dist: numpy>=1.20
|
|
29
28
|
Requires-Dist: scipy>=1.7
|
|
30
29
|
Requires-Dist: soundfile>=0.10
|
|
31
30
|
Requires-Dist: libsvm-official>=3.25
|
|
32
|
-
|
|
31
|
+
Provides-Extra: test
|
|
32
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
33
33
|
Dynamic: license-file
|
|
34
|
-
Dynamic: requires-python
|
|
35
34
|
|
|
36
35
|
# ViSQOL (Python)
|
|
37
36
|
|
|
37
|
+
[](https://pypi.org/project/visqol-python/)
|
|
38
|
+
[](https://github.com/talker93/visqol-python/actions/workflows/ci.yml)
|
|
39
|
+
[](https://pypi.org/project/visqol-python/)
|
|
40
|
+
[](LICENSE)
|
|
41
|
+
|
|
38
42
|
A pure Python implementation of [Google's ViSQOL](https://github.com/google/visqol) (Virtual Speech Quality Objective Listener) v3.3.3 for objective audio/speech quality assessment.
|
|
39
43
|
|
|
40
44
|
ViSQOL compares a reference audio signal with a degraded version and outputs a **MOS-LQO** (Mean Opinion Score - Listening Quality Objective) score on a scale of **1.0 – 5.0**.
|
|
@@ -52,10 +56,10 @@ ViSQOL compares a reference audio signal with a degraded version and outputs a *
|
|
|
52
56
|
## Installation
|
|
53
57
|
|
|
54
58
|
```bash
|
|
55
|
-
pip install
|
|
59
|
+
pip install visqol-python
|
|
56
60
|
```
|
|
57
61
|
|
|
58
|
-
Or install
|
|
62
|
+
Or install from source:
|
|
59
63
|
|
|
60
64
|
```bash
|
|
61
65
|
git clone https://github.com/talker93/visqol-python.git
|
|
@@ -167,8 +171,8 @@ Measured on Apple M-series, Python 3.13:
|
|
|
167
171
|
```
|
|
168
172
|
visqol-python/
|
|
169
173
|
├── visqol/ # Main package
|
|
170
|
-
│ ├── __init__.py # Package exports
|
|
171
|
-
│ ├── api.py # Public API
|
|
174
|
+
│ ├── __init__.py # Package exports & version
|
|
175
|
+
│ ├── api.py # Public API (VisqolApi)
|
|
172
176
|
│ ├── visqol_manager.py # Pipeline orchestrator
|
|
173
177
|
│ ├── visqol_core.py # Core algorithm
|
|
174
178
|
│ ├── audio_utils.py # Audio I/O & SPL normalization
|
|
@@ -180,14 +184,18 @@ visqol-python/
|
|
|
180
184
|
│ ├── alignment.py # Global alignment via cross-correlation
|
|
181
185
|
│ ├── nsim.py # NSIM similarity metric
|
|
182
186
|
│ ├── quality_mapper.py # SVR & exponential quality mapping
|
|
183
|
-
│
|
|
184
|
-
|
|
185
|
-
│
|
|
186
|
-
├── tests/ #
|
|
187
|
-
│ ├──
|
|
188
|
-
│
|
|
189
|
-
|
|
190
|
-
├──
|
|
187
|
+
│ ├── __main__.py # CLI entry point
|
|
188
|
+
│ └── model/ # Bundled SVR model
|
|
189
|
+
│ └── libsvm_nu_svr_model.txt
|
|
190
|
+
├── tests/ # Tests (pytest)
|
|
191
|
+
│ ├── conftest.py # Shared fixtures & CLI options
|
|
192
|
+
│ ├── test_quick.py # Smoke tests (no external data needed)
|
|
193
|
+
│ └── test_conformance.py # Full conformance tests (needs testdata)
|
|
194
|
+
├── .github/workflows/
|
|
195
|
+
│ ├── ci.yml # CI: test on Python 3.9–3.13
|
|
196
|
+
│ └── publish.yml # Auto-publish to PyPI on tag push
|
|
197
|
+
├── pyproject.toml # Package metadata & build config
|
|
198
|
+
├── CHANGELOG.md
|
|
191
199
|
├── LICENSE
|
|
192
200
|
└── README.md
|
|
193
201
|
```
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# ViSQOL (Python)
|
|
2
2
|
|
|
3
|
+
[](https://pypi.org/project/visqol-python/)
|
|
4
|
+
[](https://github.com/talker93/visqol-python/actions/workflows/ci.yml)
|
|
5
|
+
[](https://pypi.org/project/visqol-python/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
3
8
|
A pure Python implementation of [Google's ViSQOL](https://github.com/google/visqol) (Virtual Speech Quality Objective Listener) v3.3.3 for objective audio/speech quality assessment.
|
|
4
9
|
|
|
5
10
|
ViSQOL compares a reference audio signal with a degraded version and outputs a **MOS-LQO** (Mean Opinion Score - Listening Quality Objective) score on a scale of **1.0 – 5.0**.
|
|
@@ -17,10 +22,10 @@ ViSQOL compares a reference audio signal with a degraded version and outputs a *
|
|
|
17
22
|
## Installation
|
|
18
23
|
|
|
19
24
|
```bash
|
|
20
|
-
pip install
|
|
25
|
+
pip install visqol-python
|
|
21
26
|
```
|
|
22
27
|
|
|
23
|
-
Or install
|
|
28
|
+
Or install from source:
|
|
24
29
|
|
|
25
30
|
```bash
|
|
26
31
|
git clone https://github.com/talker93/visqol-python.git
|
|
@@ -132,8 +137,8 @@ Measured on Apple M-series, Python 3.13:
|
|
|
132
137
|
```
|
|
133
138
|
visqol-python/
|
|
134
139
|
├── visqol/ # Main package
|
|
135
|
-
│ ├── __init__.py # Package exports
|
|
136
|
-
│ ├── api.py # Public API
|
|
140
|
+
│ ├── __init__.py # Package exports & version
|
|
141
|
+
│ ├── api.py # Public API (VisqolApi)
|
|
137
142
|
│ ├── visqol_manager.py # Pipeline orchestrator
|
|
138
143
|
│ ├── visqol_core.py # Core algorithm
|
|
139
144
|
│ ├── audio_utils.py # Audio I/O & SPL normalization
|
|
@@ -145,14 +150,18 @@ visqol-python/
|
|
|
145
150
|
│ ├── alignment.py # Global alignment via cross-correlation
|
|
146
151
|
│ ├── nsim.py # NSIM similarity metric
|
|
147
152
|
│ ├── quality_mapper.py # SVR & exponential quality mapping
|
|
148
|
-
│
|
|
149
|
-
|
|
150
|
-
│
|
|
151
|
-
├── tests/ #
|
|
152
|
-
│ ├──
|
|
153
|
-
│
|
|
154
|
-
|
|
155
|
-
├──
|
|
153
|
+
│ ├── __main__.py # CLI entry point
|
|
154
|
+
│ └── model/ # Bundled SVR model
|
|
155
|
+
│ └── libsvm_nu_svr_model.txt
|
|
156
|
+
├── tests/ # Tests (pytest)
|
|
157
|
+
│ ├── conftest.py # Shared fixtures & CLI options
|
|
158
|
+
│ ├── test_quick.py # Smoke tests (no external data needed)
|
|
159
|
+
│ └── test_conformance.py # Full conformance tests (needs testdata)
|
|
160
|
+
├── .github/workflows/
|
|
161
|
+
│ ├── ci.yml # CI: test on Python 3.9–3.13
|
|
162
|
+
│ └── publish.yml # Auto-publish to PyPI on tag push
|
|
163
|
+
├── pyproject.toml # Package metadata & build config
|
|
164
|
+
├── CHANGELOG.md
|
|
156
165
|
├── LICENSE
|
|
157
166
|
└── README.md
|
|
158
167
|
```
|
|
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "visqol-python"
|
|
7
|
-
|
|
7
|
+
dynamic = ["version"]
|
|
8
8
|
description = "ViSQOL - Virtual Speech Quality Objective Listener (Pure Python)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
11
|
-
requires-python = ">=3.
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "Shan Jiang"},
|
|
14
14
|
]
|
|
@@ -21,7 +21,6 @@ classifiers = [
|
|
|
21
21
|
"Intended Audience :: Developers",
|
|
22
22
|
"Intended Audience :: Science/Research",
|
|
23
23
|
"Programming Language :: Python :: 3",
|
|
24
|
-
"Programming Language :: Python :: 3.8",
|
|
25
24
|
"Programming Language :: Python :: 3.9",
|
|
26
25
|
"Programming Language :: Python :: 3.10",
|
|
27
26
|
"Programming Language :: Python :: 3.11",
|
|
@@ -37,8 +36,12 @@ dependencies = [
|
|
|
37
36
|
"libsvm-official>=3.25",
|
|
38
37
|
]
|
|
39
38
|
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
test = ["pytest>=7.0"]
|
|
41
|
+
|
|
40
42
|
[project.urls]
|
|
41
43
|
Homepage = "https://github.com/talker93/visqol-python"
|
|
44
|
+
Changelog = "https://github.com/talker93/visqol-python/blob/main/CHANGELOG.md"
|
|
42
45
|
"Bug Reports" = "https://github.com/talker93/visqol-python/issues"
|
|
43
46
|
Source = "https://github.com/talker93/visqol-python"
|
|
44
47
|
"Original C++" = "https://github.com/google/visqol"
|
|
@@ -46,8 +49,19 @@ Source = "https://github.com/talker93/visqol-python"
|
|
|
46
49
|
[project.scripts]
|
|
47
50
|
visqol = "visqol.__main__:main"
|
|
48
51
|
|
|
52
|
+
[tool.setuptools.dynamic]
|
|
53
|
+
version = {attr = "visqol.__version__"}
|
|
54
|
+
|
|
49
55
|
[tool.setuptools.packages.find]
|
|
50
56
|
exclude = ["tests*"]
|
|
51
57
|
|
|
52
58
|
[tool.setuptools.package-data]
|
|
53
|
-
visqol = ["model/*.txt"]
|
|
59
|
+
visqol = ["model/*.txt", "py.typed"]
|
|
60
|
+
|
|
61
|
+
[tool.pytest.ini_options]
|
|
62
|
+
testpaths = ["tests"]
|
|
63
|
+
|
|
64
|
+
[tool.mypy]
|
|
65
|
+
strict = true
|
|
66
|
+
warn_return_any = true
|
|
67
|
+
warn_unused_configs = true
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ViSQOL Python conformance tests.
|
|
3
|
+
|
|
4
|
+
Requires the official ViSQOL testdata directory. Provide it via:
|
|
5
|
+
pytest tests/test_conformance.py --testdata /path/to/visqol/testdata
|
|
6
|
+
|
|
7
|
+
The testdata directory should contain:
|
|
8
|
+
conformance_testdata_subset/ (audio test WAV files)
|
|
9
|
+
clean_speech/ (speech test WAV files)
|
|
10
|
+
|
|
11
|
+
You can obtain these from the official ViSQOL repository:
|
|
12
|
+
https://github.com/google/visqol
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
import pytest
|
|
18
|
+
|
|
19
|
+
from visqol import VisqolApi
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ── Fixtures ──
|
|
23
|
+
|
|
24
|
+
@pytest.fixture(scope="session")
|
|
25
|
+
def testdata_dir(request):
|
|
26
|
+
"""Resolve testdata directory from --testdata or auto-detect."""
|
|
27
|
+
td = request.config.getoption("--testdata")
|
|
28
|
+
if td and os.path.isdir(td):
|
|
29
|
+
return td
|
|
30
|
+
# Fallback: look relative to this file (when inside the original visqol repo)
|
|
31
|
+
candidate = os.path.join(os.path.dirname(__file__), "..", "..", "testdata")
|
|
32
|
+
if os.path.isdir(candidate):
|
|
33
|
+
return candidate
|
|
34
|
+
pytest.skip("testdata directory not found — use --testdata /path/to/visqol/testdata")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.fixture(scope="session")
|
|
38
|
+
def conf_dir(testdata_dir):
|
|
39
|
+
return os.path.join(testdata_dir, "conformance_testdata_subset")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.fixture(scope="session")
|
|
43
|
+
def speech_dir(testdata_dir):
|
|
44
|
+
return os.path.join(testdata_dir, "clean_speech")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.fixture(scope="session")
|
|
48
|
+
def audio_api():
|
|
49
|
+
api = VisqolApi()
|
|
50
|
+
api.create(mode="audio")
|
|
51
|
+
return api
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@pytest.fixture(scope="session")
|
|
55
|
+
def speech_api():
|
|
56
|
+
api = VisqolApi()
|
|
57
|
+
api.create(mode="speech")
|
|
58
|
+
return api
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ── Test data ──
|
|
62
|
+
|
|
63
|
+
TOLERANCE = 0.05
|
|
64
|
+
|
|
65
|
+
AUDIO_CASES = [
|
|
66
|
+
("strauss48_stereo.wav", "strauss48_stereo_lp35.wav",
|
|
67
|
+
1.3888791489130758, "strauss_lp35"),
|
|
68
|
+
("steely48_stereo.wav", "steely48_stereo_lp7.wav",
|
|
69
|
+
2.2501683734385183, "steely_lp7"),
|
|
70
|
+
("sopr48_stereo.wav", "sopr48_stereo_256kbps_aac.wav",
|
|
71
|
+
4.68228969737946, "sopr_256aac"),
|
|
72
|
+
("ravel48_stereo.wav", "ravel48_stereo_128kbps_opus.wav",
|
|
73
|
+
4.465141897255348, "ravel_128opus"),
|
|
74
|
+
("moonlight48_stereo.wav", "moonlight48_stereo_128kbps_aac.wav",
|
|
75
|
+
4.684292801646114, "moonlight_128aac"),
|
|
76
|
+
("harpsichord48_stereo.wav", "harpsichord48_stereo_96kbps_mp3.wav",
|
|
77
|
+
4.22374532766003, "harpsichord_96mp3"),
|
|
78
|
+
("guitar48_stereo.wav", "guitar48_stereo_64kbps_aac.wav",
|
|
79
|
+
4.349722308064298, "guitar_64aac"),
|
|
80
|
+
("glock48_stereo.wav", "glock48_stereo_48kbps_aac.wav",
|
|
81
|
+
4.332452943882108, "glock_48aac"),
|
|
82
|
+
("contrabassoon48_stereo.wav", "contrabassoon48_stereo_24kbps_aac.wav",
|
|
83
|
+
2.346868205375293, "contrabassoon_24aac"),
|
|
84
|
+
("castanets48_stereo.wav", "castanets48_stereo.wav",
|
|
85
|
+
4.732101253042348, "castanets_identity"),
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
SPEECH_CASES = [
|
|
89
|
+
("CA01_01.wav", "transcoded_CA01_01.wav",
|
|
90
|
+
3.374505555111911, "CA01_transcoded"),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ── Audio mode tests ──
|
|
95
|
+
|
|
96
|
+
@pytest.mark.parametrize(
|
|
97
|
+
"ref_name, deg_name, expected_mos, test_id",
|
|
98
|
+
AUDIO_CASES,
|
|
99
|
+
ids=[c[3] for c in AUDIO_CASES],
|
|
100
|
+
)
|
|
101
|
+
def test_audio_conformance(audio_api, conf_dir, ref_name, deg_name, expected_mos, test_id):
|
|
102
|
+
ref_path = os.path.join(conf_dir, ref_name)
|
|
103
|
+
deg_path = os.path.join(conf_dir, deg_name)
|
|
104
|
+
result = audio_api.measure(ref_path, deg_path)
|
|
105
|
+
diff = abs(result.moslqo - expected_mos)
|
|
106
|
+
assert diff < TOLERANCE, (
|
|
107
|
+
f"[{test_id}] MOS={result.moslqo:.6f}, expected={expected_mos:.6f}, diff={diff:.6f}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ── Speech mode tests ──
|
|
112
|
+
|
|
113
|
+
@pytest.mark.parametrize(
|
|
114
|
+
"ref_name, deg_name, expected_mos, test_id",
|
|
115
|
+
SPEECH_CASES,
|
|
116
|
+
ids=[c[3] for c in SPEECH_CASES],
|
|
117
|
+
)
|
|
118
|
+
def test_speech_conformance(speech_api, speech_dir, ref_name, deg_name, expected_mos, test_id):
|
|
119
|
+
ref_path = os.path.join(speech_dir, ref_name)
|
|
120
|
+
deg_path = os.path.join(speech_dir, deg_name)
|
|
121
|
+
result = speech_api.measure(ref_path, deg_path)
|
|
122
|
+
diff = abs(result.moslqo - expected_mos)
|
|
123
|
+
assert diff < TOLERANCE, (
|
|
124
|
+
f"[{test_id}] MOS={result.moslqo:.6f}, expected={expected_mos:.6f}, diff={diff:.6f}"
|
|
125
|
+
)
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quick smoke tests for ViSQOL Python.
|
|
3
|
+
|
|
4
|
+
These tests verify basic API functionality without requiring external testdata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
from visqol import VisqolApi, SimilarityResult, AudioSignal
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── API creation ──
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TestApiCreation:
|
|
17
|
+
"""Test that VisqolApi can be created in different modes."""
|
|
18
|
+
|
|
19
|
+
def test_create_audio_mode(self):
|
|
20
|
+
api = VisqolApi()
|
|
21
|
+
api.create(mode="audio")
|
|
22
|
+
|
|
23
|
+
def test_create_speech_mode(self):
|
|
24
|
+
api = VisqolApi()
|
|
25
|
+
api.create(mode="speech")
|
|
26
|
+
|
|
27
|
+
def test_create_default_mode(self):
|
|
28
|
+
"""Default mode (no argument) should work as audio mode."""
|
|
29
|
+
api = VisqolApi()
|
|
30
|
+
api.create()
|
|
31
|
+
|
|
32
|
+
def test_create_case_insensitive(self):
|
|
33
|
+
api = VisqolApi()
|
|
34
|
+
api.create(mode="SPEECH")
|
|
35
|
+
|
|
36
|
+
def test_create_invalid_mode_raises(self):
|
|
37
|
+
api = VisqolApi()
|
|
38
|
+
with pytest.raises(ValueError, match="Invalid mode"):
|
|
39
|
+
api.create(mode="invalid")
|
|
40
|
+
|
|
41
|
+
def test_create_negative_search_window_raises(self):
|
|
42
|
+
api = VisqolApi()
|
|
43
|
+
with pytest.raises(ValueError, match="search_window"):
|
|
44
|
+
api.create(search_window=-1)
|
|
45
|
+
|
|
46
|
+
def test_create_missing_model_raises(self):
|
|
47
|
+
api = VisqolApi()
|
|
48
|
+
with pytest.raises(FileNotFoundError):
|
|
49
|
+
api.create(mode="audio", model_path="/nonexistent/model.txt")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ── Measure guards ──
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TestMeasureGuards:
|
|
56
|
+
"""Test that measure() raises helpful errors for bad inputs."""
|
|
57
|
+
|
|
58
|
+
def test_measure_before_create_raises(self):
|
|
59
|
+
api = VisqolApi()
|
|
60
|
+
with pytest.raises(RuntimeError, match="create"):
|
|
61
|
+
api.measure("a.wav", "b.wav")
|
|
62
|
+
|
|
63
|
+
def test_measure_nonexistent_ref_raises(self):
|
|
64
|
+
api = VisqolApi()
|
|
65
|
+
api.create(mode="speech")
|
|
66
|
+
with pytest.raises(FileNotFoundError, match="Reference"):
|
|
67
|
+
api.measure("/nonexistent/ref.wav", "/nonexistent/deg.wav")
|
|
68
|
+
|
|
69
|
+
def test_measure_from_arrays_before_create_raises(self):
|
|
70
|
+
api = VisqolApi()
|
|
71
|
+
with pytest.raises(RuntimeError, match="create"):
|
|
72
|
+
api.measure_from_arrays(np.zeros(100), np.zeros(100), 16000)
|
|
73
|
+
|
|
74
|
+
def test_measure_from_arrays_bad_type_raises(self):
|
|
75
|
+
api = VisqolApi()
|
|
76
|
+
api.create(mode="speech")
|
|
77
|
+
with pytest.raises(TypeError, match="numpy array"):
|
|
78
|
+
api.measure_from_arrays([1, 2, 3], np.zeros(100), 16000) # type: ignore[arg-type]
|
|
79
|
+
|
|
80
|
+
def test_measure_from_arrays_empty_raises(self):
|
|
81
|
+
api = VisqolApi()
|
|
82
|
+
api.create(mode="speech")
|
|
83
|
+
with pytest.raises(ValueError, match="empty"):
|
|
84
|
+
api.measure_from_arrays(np.array([]), np.zeros(100), 16000)
|
|
85
|
+
|
|
86
|
+
def test_measure_from_arrays_bad_sr_raises(self):
|
|
87
|
+
api = VisqolApi()
|
|
88
|
+
api.create(mode="speech")
|
|
89
|
+
with pytest.raises(ValueError, match="sample_rate"):
|
|
90
|
+
api.measure_from_arrays(np.zeros(100), np.zeros(100), 0)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── measure_from_arrays ──
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TestMeasureFromArrays:
|
|
97
|
+
"""Test measure_from_arrays with synthetic signals."""
|
|
98
|
+
|
|
99
|
+
def test_identical_signal_high_score(self):
|
|
100
|
+
"""Identical signals should produce a high MOS score."""
|
|
101
|
+
api = VisqolApi()
|
|
102
|
+
api.create(mode="speech")
|
|
103
|
+
sr = 16000
|
|
104
|
+
duration = 3.0
|
|
105
|
+
t = np.linspace(0, duration, int(sr * duration), endpoint=False)
|
|
106
|
+
signal = 0.5 * np.sin(2 * np.pi * 440 * t)
|
|
107
|
+
result = api.measure_from_arrays(signal, signal, sample_rate=sr)
|
|
108
|
+
assert result.moslqo >= 4.0, (
|
|
109
|
+
f"Identical signal should give MOS >= 4.0, got {result.moslqo:.4f}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def test_degraded_signal_lower_score(self):
|
|
113
|
+
"""Adding noise to a signal should produce a lower MOS score."""
|
|
114
|
+
api = VisqolApi()
|
|
115
|
+
api.create(mode="speech")
|
|
116
|
+
sr = 16000
|
|
117
|
+
duration = 3.0
|
|
118
|
+
t = np.linspace(0, duration, int(sr * duration), endpoint=False)
|
|
119
|
+
ref = 0.5 * np.sin(2 * np.pi * 440 * t)
|
|
120
|
+
rng = np.random.default_rng(42)
|
|
121
|
+
deg = ref + 0.3 * rng.standard_normal(len(ref))
|
|
122
|
+
result = api.measure_from_arrays(ref, deg, sample_rate=sr)
|
|
123
|
+
assert 1.0 <= result.moslqo <= 5.0, (
|
|
124
|
+
f"MOS should be in [1, 5], got {result.moslqo:.4f}"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ── Result fields ──
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class TestResultFields:
|
|
132
|
+
"""Test that SimilarityResult has all expected fields."""
|
|
133
|
+
|
|
134
|
+
def test_result_has_expected_fields(self):
|
|
135
|
+
api = VisqolApi()
|
|
136
|
+
api.create(mode="speech")
|
|
137
|
+
sr = 16000
|
|
138
|
+
duration = 3.0
|
|
139
|
+
t = np.linspace(0, duration, int(sr * duration), endpoint=False)
|
|
140
|
+
signal = 0.5 * np.sin(2 * np.pi * 440 * t)
|
|
141
|
+
result = api.measure_from_arrays(signal, signal, sample_rate=sr)
|
|
142
|
+
assert hasattr(result, "moslqo")
|
|
143
|
+
assert hasattr(result, "vnsim")
|
|
144
|
+
assert hasattr(result, "fvnsim")
|
|
145
|
+
assert hasattr(result, "fstdnsim")
|
|
146
|
+
assert hasattr(result, "fvdegenergy")
|
|
147
|
+
assert hasattr(result, "patch_sims")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ── Package metadata ──
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TestVersion:
|
|
154
|
+
"""Test package version is accessible."""
|
|
155
|
+
|
|
156
|
+
def test_version_string(self):
|
|
157
|
+
import visqol
|
|
158
|
+
assert hasattr(visqol, "__version__")
|
|
159
|
+
assert isinstance(visqol.__version__, str)
|
|
160
|
+
parts = visqol.__version__.split(".")
|
|
161
|
+
assert len(parts) >= 2, "Version should have at least major.minor"
|
|
162
|
+
|
|
163
|
+
def test_public_exports(self):
|
|
164
|
+
"""Package should export key classes."""
|
|
165
|
+
import visqol
|
|
166
|
+
assert hasattr(visqol, "VisqolApi")
|
|
167
|
+
assert hasattr(visqol, "SimilarityResult")
|
|
168
|
+
assert hasattr(visqol, "AudioSignal")
|
|
@@ -13,8 +13,10 @@ Usage:
|
|
|
13
13
|
print(f"MOS-LQO: {result.moslqo}")
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
__version__ = "3.3.
|
|
16
|
+
__version__: str = "3.3.5"
|
|
17
17
|
|
|
18
18
|
from visqol.api import VisqolApi
|
|
19
|
+
from visqol.visqol_core import SimilarityResult
|
|
20
|
+
from visqol.audio_utils import AudioSignal
|
|
19
21
|
|
|
20
|
-
__all__ = ["VisqolApi"]
|
|
22
|
+
__all__: list[str] = ["VisqolApi", "SimilarityResult", "AudioSignal"]
|
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
2
|
ViSQOL command-line interface.
|
|
3
3
|
|
|
4
|
-
Usage
|
|
4
|
+
Usage::
|
|
5
|
+
|
|
5
6
|
python -m visqol --reference ref.wav --degraded deg.wav [--speech_mode]
|
|
6
7
|
"""
|
|
7
8
|
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
8
11
|
import argparse
|
|
9
12
|
import sys
|
|
10
13
|
import logging
|
|
@@ -12,45 +15,46 @@ import logging
|
|
|
12
15
|
from visqol.api import VisqolApi
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
def main():
|
|
18
|
+
def main() -> None:
|
|
19
|
+
"""Entry point for the ``visqol`` CLI."""
|
|
16
20
|
parser = argparse.ArgumentParser(
|
|
17
|
-
description="ViSQOL - Virtual Speech Quality Objective Listener (Python)"
|
|
21
|
+
description="ViSQOL - Virtual Speech Quality Objective Listener (Python)",
|
|
18
22
|
)
|
|
19
23
|
parser.add_argument(
|
|
20
24
|
"--reference", "-r", required=True,
|
|
21
|
-
help="Path to reference audio file (WAV)"
|
|
25
|
+
help="Path to reference audio file (WAV)",
|
|
22
26
|
)
|
|
23
27
|
parser.add_argument(
|
|
24
28
|
"--degraded", "-d", required=True,
|
|
25
|
-
help="Path to degraded audio file (WAV)"
|
|
29
|
+
help="Path to degraded audio file (WAV)",
|
|
26
30
|
)
|
|
27
31
|
parser.add_argument(
|
|
28
32
|
"--speech_mode", action="store_true",
|
|
29
|
-
help="Use speech mode (
|
|
33
|
+
help="Use speech mode (16 kHz, exponential mapping)",
|
|
30
34
|
)
|
|
31
35
|
parser.add_argument(
|
|
32
36
|
"--model", default=None,
|
|
33
|
-
help="Path to SVR model file (Audio mode only)"
|
|
37
|
+
help="Path to SVR model file (Audio mode only)",
|
|
34
38
|
)
|
|
35
39
|
parser.add_argument(
|
|
36
40
|
"--search_window", type=int, default=60,
|
|
37
|
-
help="Search window radius (default: 60)"
|
|
41
|
+
help="Search window radius (default: 60)",
|
|
38
42
|
)
|
|
39
43
|
parser.add_argument(
|
|
40
44
|
"--unscaled_speech", action="store_true",
|
|
41
|
-
help="Don't scale speech MOS to max 5.0"
|
|
45
|
+
help="Don't scale speech MOS to max 5.0",
|
|
42
46
|
)
|
|
43
47
|
parser.add_argument(
|
|
44
48
|
"--no_alignment", action="store_true",
|
|
45
|
-
help="Disable global alignment"
|
|
49
|
+
help="Disable global alignment",
|
|
46
50
|
)
|
|
47
51
|
parser.add_argument(
|
|
48
52
|
"--no_realignment", action="store_true",
|
|
49
|
-
help="Disable fine realignment"
|
|
53
|
+
help="Disable fine realignment",
|
|
50
54
|
)
|
|
51
55
|
parser.add_argument(
|
|
52
56
|
"--verbose", "-v", action="store_true",
|
|
53
|
-
help="Enable verbose output"
|
|
57
|
+
help="Enable verbose output",
|
|
54
58
|
)
|
|
55
59
|
|
|
56
60
|
args = parser.parse_args()
|
|
@@ -60,18 +64,30 @@ def main():
|
|
|
60
64
|
logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
|
|
61
65
|
|
|
62
66
|
# Run ViSQOL
|
|
63
|
-
mode = "speech" if args.speech_mode else "audio"
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
mode: str = "speech" if args.speech_mode else "audio"
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
api = VisqolApi()
|
|
71
|
+
api.create(
|
|
72
|
+
mode=mode,
|
|
73
|
+
model_path=args.model,
|
|
74
|
+
search_window=args.search_window,
|
|
75
|
+
use_unscaled_speech=args.unscaled_speech,
|
|
76
|
+
disable_global_alignment=args.no_alignment,
|
|
77
|
+
disable_realignment=args.no_realignment,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
result = api.measure(args.reference, args.degraded)
|
|
73
81
|
|
|
74
|
-
|
|
82
|
+
except FileNotFoundError as exc:
|
|
83
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
except ValueError as exc:
|
|
86
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
except RuntimeError as exc:
|
|
89
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
90
|
+
sys.exit(1)
|
|
75
91
|
|
|
76
92
|
# Output results
|
|
77
93
|
print(f"MOS-LQO: {result.moslqo:.6f}")
|
|
@@ -83,9 +99,11 @@ def main():
|
|
|
83
99
|
print(f"FVDEGENERGY: {result.fvdegenergy}")
|
|
84
100
|
print(f"Patches: {len(result.patch_sims)}")
|
|
85
101
|
for i, p in enumerate(result.patch_sims):
|
|
86
|
-
print(
|
|
87
|
-
|
|
88
|
-
|
|
102
|
+
print(
|
|
103
|
+
f" Patch {i}: sim={p.similarity:.4f} "
|
|
104
|
+
f"ref=[{p.ref_patch_start_time:.3f}-{p.ref_patch_end_time:.3f}] "
|
|
105
|
+
f"deg=[{p.deg_patch_start_time:.3f}-{p.deg_patch_end_time:.3f}]"
|
|
106
|
+
)
|
|
89
107
|
|
|
90
108
|
|
|
91
109
|
if __name__ == "__main__":
|