visqol-python 3.3.5__tar.gz → 3.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visqol_python-3.3.6/CHANGELOG.md +55 -0
- visqol_python-3.3.6/CONTRIBUTING.md +77 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/MANIFEST.in +2 -1
- {visqol_python-3.3.5/visqol_python.egg-info → visqol_python-3.3.6}/PKG-INFO +32 -1
- {visqol_python-3.3.5 → visqol_python-3.3.6}/README.md +27 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/pyproject.toml +29 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/tests/test_quick.py +94 -1
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/__init__.py +11 -4
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/__main__.py +24 -19
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/api.py +59 -9
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/audio_utils.py +10 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/gammatone.py +38 -56
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/nsim.py +13 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/visqol_core.py +10 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6/visqol_python.egg-info}/PKG-INFO +32 -1
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol_python.egg-info/SOURCES.txt +2 -1
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol_python.egg-info/requires.txt +5 -0
- visqol_python-3.3.5/requirements.txt +0 -4
- {visqol_python-3.3.5 → visqol_python-3.3.6}/LICENSE +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/setup.cfg +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/tests/test_conformance.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/alignment.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/analysis_window.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/model/libsvm_nu_svr_model.txt +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/patch_creator.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/patch_selector.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/py.typed +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/quality_mapper.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/signal_utils.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol/visqol_manager.py +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol_python.egg-info/dependency_links.txt +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol_python.egg-info/entry_points.txt +0 -0
- {visqol_python-3.3.5 → visqol_python-3.3.6}/visqol_python.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/), and this project adheres to [Semantic Versioning](https://semver.org/).
|
|
6
|
+
|
|
7
|
+
## [3.3.5] - 2026-03-23
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- **Type hints** on all public and internal APIs (`from __future__ import annotations`)
|
|
11
|
+
- **`py.typed`** marker (PEP 561) — mypy / pyright can now type-check dependents
|
|
12
|
+
- **CONTRIBUTING.md** with development setup, code style, and PR guidelines
|
|
13
|
+
- Exported `SimilarityResult` and `AudioSignal` from top-level `visqol` package
|
|
14
|
+
- `mypy` configuration in `pyproject.toml`
|
|
15
|
+
|
|
16
|
+
### Improved
|
|
17
|
+
- **Error handling**: friendly `ValueError` / `FileNotFoundError` / `TypeError` throughout:
|
|
18
|
+
- `VisqolApi.create()` now validates mode, search_window, and model_path
|
|
19
|
+
- `VisqolApi.measure()` checks file existence before processing
|
|
20
|
+
- `VisqolApi.measure_from_arrays()` validates array types, emptiness, and sample rate
|
|
21
|
+
- `AudioSignal` validates sample rate on construction
|
|
22
|
+
- `AnalysisWindow` validates sample_rate and overlap range
|
|
23
|
+
- CLI now catches exceptions and prints user-friendly error messages
|
|
24
|
+
- `AnalysisWindow.apply_hann_window()` uses `ValueError` instead of bare `assert`
|
|
25
|
+
|
|
26
|
+
## [3.3.4] - 2026-03-23
|
|
27
|
+
|
|
28
|
+
### Improved
|
|
29
|
+
- Tests rewritten in **pytest** format with `parametrize` and fixtures
|
|
30
|
+
- Added **CI workflow** (GitHub Actions): auto-test on Python 3.9–3.13 for every push/PR
|
|
31
|
+
- Added **smoke tests** (`test_quick.py`) that run without external testdata
|
|
32
|
+
- Version number now managed in a single place (`visqol/__init__.py`)
|
|
33
|
+
- Removed redundant `setup.py` — `pyproject.toml` is the single source of truth
|
|
34
|
+
- Added this CHANGELOG
|
|
35
|
+
- README: added PyPI / CI / License badges
|
|
36
|
+
|
|
37
|
+
### Fixed
|
|
38
|
+
- `requires-python` updated from `>=3.8` to `>=3.9` (numpy/scipy dropped 3.8 support)
|
|
39
|
+
|
|
40
|
+
## [3.3.3] - 2026-03-23
|
|
41
|
+
|
|
42
|
+
### Added
|
|
43
|
+
- Initial PyPI release as `visqol-python`
|
|
44
|
+
- Pure Python port of [Google's ViSQOL v3.3.3](https://github.com/google/visqol)
|
|
45
|
+
- **Audio mode** (48 kHz, SVR quality mapping) — 10/10 conformance tests pass
|
|
46
|
+
- **Speech mode** (16 kHz, exponential polynomial mapping) — 1/1 conformance test passes
|
|
47
|
+
- Python API: `VisqolApi.measure()` and `VisqolApi.measure_from_arrays()`
|
|
48
|
+
- CLI: `python -m visqol` / `visqol` command
|
|
49
|
+
- Bundled SVR model (`libsvm_nu_svr_model.txt`)
|
|
50
|
+
- GitHub Actions workflow for auto-publish to PyPI via Trusted Publisher
|
|
51
|
+
|
|
52
|
+
[3.3.6]: https://github.com/talker93/visqol-python/compare/v3.3.5...v3.3.6
|
|
53
|
+
[3.3.5]: https://github.com/talker93/visqol-python/compare/v3.3.4...v3.3.5
|
|
54
|
+
[3.3.4]: https://github.com/talker93/visqol-python/compare/v3.3.3...v3.3.4
|
|
55
|
+
[3.3.3]: https://github.com/talker93/visqol-python/releases/tag/v3.3.3
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Contributing to ViSQOL (Python)
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing! This document provides guidelines and instructions for contributing to this project.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
1. **Fork** the repository on GitHub
|
|
8
|
+
2. **Clone** your fork locally:
|
|
9
|
+
```bash
|
|
10
|
+
git clone https://github.com/<your-username>/visqol-python.git
|
|
11
|
+
cd visqol-python
|
|
12
|
+
```
|
|
13
|
+
3. **Create a branch** for your changes:
|
|
14
|
+
```bash
|
|
15
|
+
git checkout -b feature/my-improvement
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Development Setup
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Create a virtual environment
|
|
22
|
+
python -m venv venv
|
|
23
|
+
source venv/bin/activate # macOS/Linux
|
|
24
|
+
# venv\Scripts\activate # Windows
|
|
25
|
+
|
|
26
|
+
# Install in development mode with test dependencies
|
|
27
|
+
pip install -e ".[test]"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Running Tests
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Quick smoke tests (no external data needed)
|
|
34
|
+
pytest tests/test_quick.py -v
|
|
35
|
+
|
|
36
|
+
# Full conformance tests (requires testdata directory)
|
|
37
|
+
pytest tests/test_conformance.py -v --testdata /path/to/visqol/testdata
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Code Style
|
|
41
|
+
|
|
42
|
+
- **Type hints**: All public functions and methods must include type annotations.
|
|
43
|
+
- **Docstrings**: Use Google-style docstrings for all public APIs.
|
|
44
|
+
- **Imports**: Use `from __future__ import annotations` at the top of every module.
|
|
45
|
+
- Keep line length ≤ 99 characters where practical.
|
|
46
|
+
|
|
47
|
+
## Making Changes
|
|
48
|
+
|
|
49
|
+
1. Write clean, well-documented code with type hints.
|
|
50
|
+
2. Add or update tests for any new functionality.
|
|
51
|
+
3. Ensure all existing tests still pass.
|
|
52
|
+
4. Update `CHANGELOG.md` under an `[Unreleased]` section.
|
|
53
|
+
|
|
54
|
+
## Pull Request Process
|
|
55
|
+
|
|
56
|
+
1. Update the `CHANGELOG.md` with details of your changes.
|
|
57
|
+
2. Ensure all tests pass locally.
|
|
58
|
+
3. Submit a pull request with a clear description of the changes.
|
|
59
|
+
4. Link any relevant issues.
|
|
60
|
+
|
|
61
|
+
## Reporting Bugs
|
|
62
|
+
|
|
63
|
+
Please open an [issue](https://github.com/talker93/visqol-python/issues) with:
|
|
64
|
+
|
|
65
|
+
- A clear, descriptive title
|
|
66
|
+
- Steps to reproduce the problem
|
|
67
|
+
- Expected vs. actual behavior
|
|
68
|
+
- Python version and OS
|
|
69
|
+
- Relevant audio file details (sample rate, duration, format)
|
|
70
|
+
|
|
71
|
+
## Versioning
|
|
72
|
+
|
|
73
|
+
This project follows [Semantic Versioning](https://semver.org/). The single source of truth for the version number is `visqol/__init__.py`.
|
|
74
|
+
|
|
75
|
+
## License
|
|
76
|
+
|
|
77
|
+
By contributing, you agree that your contributions will be licensed under the [Apache License 2.0](LICENSE).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visqol-python
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.6
|
|
4
4
|
Summary: ViSQOL - Virtual Speech Quality Objective Listener (Pure Python)
|
|
5
5
|
Author: Shan Jiang
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -30,6 +30,10 @@ Requires-Dist: soundfile>=0.10
|
|
|
30
30
|
Requires-Dist: libsvm-official>=3.25
|
|
31
31
|
Provides-Extra: test
|
|
32
32
|
Requires-Dist: pytest>=7.0; extra == "test"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
36
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
33
37
|
Dynamic: license-file
|
|
34
38
|
|
|
35
39
|
# ViSQOL (Python)
|
|
@@ -103,6 +107,33 @@ result = api.measure_from_arrays(ref, deg, sample_rate=sr)
|
|
|
103
107
|
print(f"MOS-LQO: {result.moslqo:.4f}")
|
|
104
108
|
```
|
|
105
109
|
|
|
110
|
+
### Batch Evaluation
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from visqol import VisqolApi
|
|
114
|
+
|
|
115
|
+
api = VisqolApi()
|
|
116
|
+
api.create(mode="audio")
|
|
117
|
+
|
|
118
|
+
file_pairs = [
|
|
119
|
+
("ref1.wav", "deg1.wav"),
|
|
120
|
+
("ref2.wav", "deg2.wav"),
|
|
121
|
+
("ref3.wav", "deg3.wav"),
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
# Optional progress callback
|
|
125
|
+
results = api.measure_batch(
|
|
126
|
+
file_pairs,
|
|
127
|
+
progress_callback=lambda done, total: print(f"{done}/{total}"),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for pair, result in zip(file_pairs, results):
|
|
131
|
+
if isinstance(result, Exception):
|
|
132
|
+
print(f"{pair}: FAILED — {result}")
|
|
133
|
+
else:
|
|
134
|
+
print(f"{pair}: MOS-LQO = {result.moslqo:.4f}")
|
|
135
|
+
```
|
|
136
|
+
|
|
106
137
|
### Command Line
|
|
107
138
|
|
|
108
139
|
```bash
|
|
@@ -69,6 +69,33 @@ result = api.measure_from_arrays(ref, deg, sample_rate=sr)
|
|
|
69
69
|
print(f"MOS-LQO: {result.moslqo:.4f}")
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
+
### Batch Evaluation
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from visqol import VisqolApi
|
|
76
|
+
|
|
77
|
+
api = VisqolApi()
|
|
78
|
+
api.create(mode="audio")
|
|
79
|
+
|
|
80
|
+
file_pairs = [
|
|
81
|
+
("ref1.wav", "deg1.wav"),
|
|
82
|
+
("ref2.wav", "deg2.wav"),
|
|
83
|
+
("ref3.wav", "deg3.wav"),
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
# Optional progress callback
|
|
87
|
+
results = api.measure_batch(
|
|
88
|
+
file_pairs,
|
|
89
|
+
progress_callback=lambda done, total: print(f"{done}/{total}"),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
for pair, result in zip(file_pairs, results):
|
|
93
|
+
if isinstance(result, Exception):
|
|
94
|
+
print(f"{pair}: FAILED — {result}")
|
|
95
|
+
else:
|
|
96
|
+
print(f"{pair}: MOS-LQO = {result.moslqo:.4f}")
|
|
97
|
+
```
|
|
98
|
+
|
|
72
99
|
### Command Line
|
|
73
100
|
|
|
74
101
|
```bash
|
|
@@ -38,6 +38,11 @@ dependencies = [
|
|
|
38
38
|
|
|
39
39
|
[project.optional-dependencies]
|
|
40
40
|
test = ["pytest>=7.0"]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=7.0",
|
|
43
|
+
"ruff>=0.4",
|
|
44
|
+
"mypy>=1.8",
|
|
45
|
+
]
|
|
41
46
|
|
|
42
47
|
[project.urls]
|
|
43
48
|
Homepage = "https://github.com/talker93/visqol-python"
|
|
@@ -61,7 +66,31 @@ visqol = ["model/*.txt", "py.typed"]
|
|
|
61
66
|
[tool.pytest.ini_options]
|
|
62
67
|
testpaths = ["tests"]
|
|
63
68
|
|
|
69
|
+
[tool.ruff]
|
|
70
|
+
target-version = "py39"
|
|
71
|
+
line-length = 95
|
|
72
|
+
|
|
73
|
+
[tool.ruff.lint]
|
|
74
|
+
select = [
|
|
75
|
+
"E", # pycodestyle errors
|
|
76
|
+
"W", # pycodestyle warnings
|
|
77
|
+
"F", # pyflakes
|
|
78
|
+
"I", # isort
|
|
79
|
+
"UP", # pyupgrade
|
|
80
|
+
"B", # flake8-bugbear
|
|
81
|
+
"SIM", # flake8-simplify
|
|
82
|
+
"RUF", # ruff-specific rules
|
|
83
|
+
]
|
|
84
|
+
ignore = ["E501"] # line length handled by formatter
|
|
85
|
+
|
|
86
|
+
[tool.ruff.lint.isort]
|
|
87
|
+
known-first-party = ["visqol"]
|
|
88
|
+
|
|
64
89
|
[tool.mypy]
|
|
65
90
|
strict = true
|
|
66
91
|
warn_return_any = true
|
|
67
92
|
warn_unused_configs = true
|
|
93
|
+
|
|
94
|
+
[[tool.mypy.overrides]]
|
|
95
|
+
module = ["svmutil.*", "libsvm.*", "soundfile.*"]
|
|
96
|
+
ignore_missing_imports = true
|
|
@@ -7,7 +7,13 @@ These tests verify basic API functionality without requiring external testdata.
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pytest
|
|
9
9
|
|
|
10
|
-
from visqol import
|
|
10
|
+
from visqol import (
|
|
11
|
+
AudioSignal,
|
|
12
|
+
PatchSimilarityResult,
|
|
13
|
+
ProgressCallback,
|
|
14
|
+
SimilarityResult,
|
|
15
|
+
VisqolApi,
|
|
16
|
+
)
|
|
11
17
|
|
|
12
18
|
|
|
13
19
|
# ── API creation ──
|
|
@@ -147,6 +153,91 @@ class TestResultFields:
|
|
|
147
153
|
assert hasattr(result, "patch_sims")
|
|
148
154
|
|
|
149
155
|
|
|
156
|
+
# ── __repr__ / __str__ ──
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class TestReprStr:
|
|
160
|
+
"""Test readable string representations."""
|
|
161
|
+
|
|
162
|
+
def test_audio_signal_repr(self):
|
|
163
|
+
sig = AudioSignal(np.zeros(16000), 16000)
|
|
164
|
+
r = repr(sig)
|
|
165
|
+
assert "AudioSignal" in r
|
|
166
|
+
assert "16000" in r
|
|
167
|
+
assert "1.000" in r
|
|
168
|
+
|
|
169
|
+
def test_audio_signal_str(self):
|
|
170
|
+
sig = AudioSignal(np.zeros(48000), 48000)
|
|
171
|
+
s = str(sig)
|
|
172
|
+
assert "1.000s" in s
|
|
173
|
+
assert "48000" in s
|
|
174
|
+
|
|
175
|
+
def test_similarity_result_str(self):
|
|
176
|
+
res = SimilarityResult(moslqo=4.5, vnsim=0.95)
|
|
177
|
+
s = str(res)
|
|
178
|
+
assert "4.5" in s
|
|
179
|
+
assert "0.95" in s
|
|
180
|
+
|
|
181
|
+
def test_similarity_result_repr(self):
|
|
182
|
+
res = SimilarityResult(moslqo=4.5, vnsim=0.95)
|
|
183
|
+
r = repr(res)
|
|
184
|
+
assert "SimilarityResult" in r
|
|
185
|
+
assert "moslqo" in r
|
|
186
|
+
|
|
187
|
+
def test_patch_similarity_result_str(self):
|
|
188
|
+
p = PatchSimilarityResult(similarity=0.85)
|
|
189
|
+
s = str(p)
|
|
190
|
+
assert "0.85" in s
|
|
191
|
+
|
|
192
|
+
def test_patch_similarity_result_repr(self):
|
|
193
|
+
p = PatchSimilarityResult(similarity=0.85)
|
|
194
|
+
r = repr(p)
|
|
195
|
+
assert "PatchSimilarityResult" in r
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# ── measure_batch ──
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class TestMeasureBatch:
|
|
202
|
+
"""Test batch evaluation API."""
|
|
203
|
+
|
|
204
|
+
def test_batch_before_create_raises(self):
|
|
205
|
+
api = VisqolApi()
|
|
206
|
+
with pytest.raises(RuntimeError, match="create"):
|
|
207
|
+
api.measure_batch([("/a.wav", "/b.wav")])
|
|
208
|
+
|
|
209
|
+
def test_batch_nonexistent_files_returns_exceptions(self):
|
|
210
|
+
api = VisqolApi()
|
|
211
|
+
api.create(mode="speech")
|
|
212
|
+
results = api.measure_batch([
|
|
213
|
+
("/nonexistent/a.wav", "/nonexistent/b.wav"),
|
|
214
|
+
("/nonexistent/c.wav", "/nonexistent/d.wav"),
|
|
215
|
+
])
|
|
216
|
+
assert len(results) == 2
|
|
217
|
+
assert all(isinstance(r, Exception) for r in results)
|
|
218
|
+
|
|
219
|
+
def test_batch_progress_callback(self):
|
|
220
|
+
api = VisqolApi()
|
|
221
|
+
api.create(mode="speech")
|
|
222
|
+
progress_log: list[tuple[int, int]] = []
|
|
223
|
+
|
|
224
|
+
def cb(done: int, total: int) -> None:
|
|
225
|
+
progress_log.append((done, total))
|
|
226
|
+
|
|
227
|
+
results = api.measure_batch(
|
|
228
|
+
[("/nonexistent/a.wav", "/nonexistent/b.wav")],
|
|
229
|
+
progress_callback=cb,
|
|
230
|
+
)
|
|
231
|
+
assert len(results) == 1
|
|
232
|
+
assert progress_log == [(1, 1)]
|
|
233
|
+
|
|
234
|
+
def test_batch_empty(self):
|
|
235
|
+
api = VisqolApi()
|
|
236
|
+
api.create(mode="speech")
|
|
237
|
+
results = api.measure_batch([])
|
|
238
|
+
assert results == []
|
|
239
|
+
|
|
240
|
+
|
|
150
241
|
# ── Package metadata ──
|
|
151
242
|
|
|
152
243
|
|
|
@@ -166,3 +257,5 @@ class TestVersion:
|
|
|
166
257
|
assert hasattr(visqol, "VisqolApi")
|
|
167
258
|
assert hasattr(visqol, "SimilarityResult")
|
|
168
259
|
assert hasattr(visqol, "AudioSignal")
|
|
260
|
+
assert hasattr(visqol, "PatchSimilarityResult")
|
|
261
|
+
assert hasattr(visqol, "ProgressCallback")
|
|
@@ -13,10 +13,17 @@ Usage:
|
|
|
13
13
|
print(f"MOS-LQO: {result.moslqo}")
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
__version__: str = "3.3.
|
|
16
|
+
__version__: str = "3.3.6"
|
|
17
17
|
|
|
18
|
-
from visqol.api import VisqolApi
|
|
19
|
-
from visqol.visqol_core import SimilarityResult
|
|
18
|
+
from visqol.api import ProgressCallback, VisqolApi
|
|
20
19
|
from visqol.audio_utils import AudioSignal
|
|
20
|
+
from visqol.nsim import PatchSimilarityResult
|
|
21
|
+
from visqol.visqol_core import SimilarityResult
|
|
21
22
|
|
|
22
|
-
__all__: list[str] = [
|
|
23
|
+
__all__: list[str] = [
|
|
24
|
+
"VisqolApi",
|
|
25
|
+
"SimilarityResult",
|
|
26
|
+
"AudioSignal",
|
|
27
|
+
"PatchSimilarityResult",
|
|
28
|
+
"ProgressCallback",
|
|
29
|
+
]
|
|
@@ -9,11 +9,13 @@ Usage::
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
11
|
import argparse
|
|
12
|
-
import sys
|
|
13
12
|
import logging
|
|
13
|
+
import sys
|
|
14
14
|
|
|
15
15
|
from visqol.api import VisqolApi
|
|
16
16
|
|
|
17
|
+
logger = logging.getLogger("visqol")
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
def main() -> None:
|
|
19
21
|
"""Entry point for the ``visqol`` CLI."""
|
|
@@ -61,7 +63,11 @@ def main() -> None:
|
|
|
61
63
|
|
|
62
64
|
# Setup logging
|
|
63
65
|
level = logging.DEBUG if args.verbose else logging.WARNING
|
|
64
|
-
logging.basicConfig(
|
|
66
|
+
logging.basicConfig(
|
|
67
|
+
level=level,
|
|
68
|
+
format="%(levelname)s: %(message)s",
|
|
69
|
+
stream=sys.stderr,
|
|
70
|
+
)
|
|
65
71
|
|
|
66
72
|
# Run ViSQOL
|
|
67
73
|
mode: str = "speech" if args.speech_mode else "audio"
|
|
@@ -79,30 +85,29 @@ def main() -> None:
|
|
|
79
85
|
|
|
80
86
|
result = api.measure(args.reference, args.degraded)
|
|
81
87
|
|
|
82
|
-
except FileNotFoundError as exc:
|
|
83
|
-
|
|
84
|
-
sys.exit(1)
|
|
85
|
-
except ValueError as exc:
|
|
86
|
-
print(f"Error: {exc}", file=sys.stderr)
|
|
87
|
-
sys.exit(1)
|
|
88
|
-
except RuntimeError as exc:
|
|
89
|
-
print(f"Error: {exc}", file=sys.stderr)
|
|
88
|
+
except (FileNotFoundError, ValueError, RuntimeError) as exc:
|
|
89
|
+
logger.error("%s", exc)
|
|
90
90
|
sys.exit(1)
|
|
91
|
+
except Exception as exc:
|
|
92
|
+
logger.error("Unexpected error: %s", exc)
|
|
93
|
+
sys.exit(2)
|
|
91
94
|
|
|
92
95
|
# Output results
|
|
93
96
|
print(f"MOS-LQO: {result.moslqo:.6f}")
|
|
94
97
|
print(f"VNSIM: {result.vnsim:.6f}")
|
|
98
|
+
|
|
95
99
|
if args.verbose:
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
logger.info("FVNSIM: %s", result.fvnsim)
|
|
101
|
+
logger.info("FVNSIM10: %s", result.fvnsim10)
|
|
102
|
+
logger.info("FSTDNSIM: %s", result.fstdnsim)
|
|
103
|
+
logger.info("FVDEGENERGY: %s", result.fvdegenergy)
|
|
104
|
+
logger.info("Patches: %d", len(result.patch_sims))
|
|
101
105
|
for i, p in enumerate(result.patch_sims):
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
+
logger.info(
|
|
107
|
+
" Patch %d: sim=%.4f ref=[%.3f-%.3f] deg=[%.3f-%.3f]",
|
|
108
|
+
i, p.similarity,
|
|
109
|
+
p.ref_patch_start_time, p.ref_patch_end_time,
|
|
110
|
+
p.deg_patch_start_time, p.deg_patch_end_time,
|
|
106
111
|
)
|
|
107
112
|
|
|
108
113
|
|
|
@@ -8,8 +8,10 @@ Corresponds to C++ file: visqol_api.cc
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import logging
|
|
11
12
|
import os
|
|
12
|
-
from
|
|
13
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
14
|
+
from typing import Callable, List, Optional, Sequence, Tuple, Union
|
|
13
15
|
|
|
14
16
|
import numpy as np
|
|
15
17
|
from numpy.typing import NDArray
|
|
@@ -18,6 +20,8 @@ from visqol.audio_utils import AudioSignal
|
|
|
18
20
|
from visqol.visqol_manager import VisqolManager
|
|
19
21
|
from visqol.visqol_core import SimilarityResult
|
|
20
22
|
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
21
25
|
# Valid mode names
|
|
22
26
|
_VALID_MODES = frozenset({"audio", "speech"})
|
|
23
27
|
|
|
@@ -25,6 +29,9 @@ _VALID_MODES = frozenset({"audio", "speech"})
|
|
|
25
29
|
_DEFAULT_MODEL_DIR: str = os.path.join(os.path.dirname(__file__), "model")
|
|
26
30
|
_DEFAULT_SVR_MODEL: str = os.path.join(_DEFAULT_MODEL_DIR, "libsvm_nu_svr_model.txt")
|
|
27
31
|
|
|
32
|
+
# Type alias for progress callback: (completed_count, total_count) -> None
|
|
33
|
+
ProgressCallback = Callable[[int, int], None]
|
|
34
|
+
|
|
28
35
|
|
|
29
36
|
class VisqolApi:
|
|
30
37
|
"""
|
|
@@ -41,6 +48,7 @@ class VisqolApi:
|
|
|
41
48
|
def __init__(self) -> None:
|
|
42
49
|
self._manager: VisqolManager = VisqolManager()
|
|
43
50
|
self._is_created: bool = False
|
|
51
|
+
self._create_kwargs: dict[str, object] = {}
|
|
44
52
|
|
|
45
53
|
def create(
|
|
46
54
|
self,
|
|
@@ -90,14 +98,17 @@ class VisqolApi:
|
|
|
90
98
|
f"SVR model file not found: {model_path}"
|
|
91
99
|
)
|
|
92
100
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
+
# Store kwargs for batch mode (subprocess recreation)
|
|
102
|
+
self._create_kwargs = {
|
|
103
|
+
"model_path": model_path or "",
|
|
104
|
+
"use_speech_mode": use_speech_mode,
|
|
105
|
+
"use_unscaled_speech": use_unscaled_speech,
|
|
106
|
+
"search_window": search_window,
|
|
107
|
+
"disable_global_alignment": disable_global_alignment,
|
|
108
|
+
"disable_realignment": disable_realignment,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
self._manager.init(**self._create_kwargs) # type: ignore[arg-type]
|
|
101
112
|
self._is_created = True
|
|
102
113
|
|
|
103
114
|
def measure(self, ref_path: str, deg_path: str) -> SimilarityResult:
|
|
@@ -169,6 +180,45 @@ class VisqolApi:
|
|
|
169
180
|
deg_signal = AudioSignal(deg_array, sample_rate)
|
|
170
181
|
return self._manager.run_from_signals(ref_signal, deg_signal)
|
|
171
182
|
|
|
183
|
+
def measure_batch(
|
|
184
|
+
self,
|
|
185
|
+
file_pairs: Sequence[Tuple[str, str]],
|
|
186
|
+
*,
|
|
187
|
+
progress_callback: Optional[ProgressCallback] = None,
|
|
188
|
+
) -> List[Union[SimilarityResult, Exception]]:
|
|
189
|
+
"""
|
|
190
|
+
Evaluate multiple file pairs sequentially.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
file_pairs: Sequence of ``(ref_path, deg_path)`` tuples.
|
|
194
|
+
progress_callback: Optional ``(completed, total) -> None`` callback
|
|
195
|
+
invoked after each pair is processed.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
List of :class:`SimilarityResult` (on success) or :class:`Exception`
|
|
199
|
+
(on failure) for each pair, in the same order as *file_pairs*.
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
RuntimeError: If :meth:`create` has not been called.
|
|
203
|
+
"""
|
|
204
|
+
self._ensure_created()
|
|
205
|
+
total = len(file_pairs)
|
|
206
|
+
results: List[Union[SimilarityResult, Exception]] = []
|
|
207
|
+
|
|
208
|
+
for idx, (ref_path, deg_path) in enumerate(file_pairs):
|
|
209
|
+
try:
|
|
210
|
+
result = self.measure(ref_path, deg_path)
|
|
211
|
+
results.append(result)
|
|
212
|
+
except Exception as exc:
|
|
213
|
+
results.append(exc)
|
|
214
|
+
logger.warning(
|
|
215
|
+
"Pair %d/%d failed: %s", idx + 1, total, exc,
|
|
216
|
+
)
|
|
217
|
+
if progress_callback is not None:
|
|
218
|
+
progress_callback(idx + 1, total)
|
|
219
|
+
|
|
220
|
+
return results
|
|
221
|
+
|
|
172
222
|
# ------------------------------------------------------------------
|
|
173
223
|
# Private helpers
|
|
174
224
|
# ------------------------------------------------------------------
|
|
@@ -53,6 +53,16 @@ class AudioSignal:
|
|
|
53
53
|
def __len__(self) -> int:
|
|
54
54
|
return len(self.data)
|
|
55
55
|
|
|
56
|
+
def __repr__(self) -> str:
|
|
57
|
+
return (
|
|
58
|
+
f"AudioSignal(samples={self.num_samples}, "
|
|
59
|
+
f"sample_rate={self.sample_rate}, "
|
|
60
|
+
f"duration={self.duration:.3f}s)"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def __str__(self) -> str:
|
|
64
|
+
return f"AudioSignal({self.duration:.3f}s @ {self.sample_rate} Hz)"
|
|
65
|
+
|
|
56
66
|
|
|
57
67
|
def load_audio(path: str) -> Tuple[NDArray[np.float64], int]:
|
|
58
68
|
"""
|
|
@@ -174,21 +174,6 @@ def make_erb_filters(
|
|
|
174
174
|
return ErbFiltersResult(center_freqs=cf, filter_coeffs=filter_coeffs)
|
|
175
175
|
|
|
176
176
|
|
|
177
|
-
def _iir_filter(
|
|
178
|
-
b: NDArray[np.float64],
|
|
179
|
-
a: NDArray[np.float64],
|
|
180
|
-
signal: NDArray[np.float64],
|
|
181
|
-
zi: NDArray[np.float64],
|
|
182
|
-
) -> Tuple[NDArray[np.float64], NDArray[np.float64]]:
|
|
183
|
-
"""
|
|
184
|
-
Apply IIR filter (Direct Form II transposed).
|
|
185
|
-
|
|
186
|
-
Matches C++ ``SignalFilter::Filter``.
|
|
187
|
-
"""
|
|
188
|
-
y, zf = lfilter(b, a, signal, zi=zi)
|
|
189
|
-
return y, zf
|
|
190
|
-
|
|
191
|
-
|
|
192
177
|
class GammatoneFilterBank:
|
|
193
178
|
"""
|
|
194
179
|
Gammatone filterbank that applies 4-stage cascaded IIR filtering.
|
|
@@ -226,9 +211,9 @@ class GammatoneFilterBank:
|
|
|
226
211
|
"""
|
|
227
212
|
assert self._conditions is not None, "Call reset_conditions() first"
|
|
228
213
|
|
|
229
|
-
|
|
214
|
+
nb = self.num_bands
|
|
230
215
|
|
|
231
|
-
# Extract coefficient vectors
|
|
216
|
+
# Extract coefficient vectors (all bands at once)
|
|
232
217
|
A0 = filter_coeffs[0]
|
|
233
218
|
A11 = filter_coeffs[1]
|
|
234
219
|
A12 = filter_coeffs[2]
|
|
@@ -240,36 +225,28 @@ class GammatoneFilterBank:
|
|
|
240
225
|
B2 = filter_coeffs[8]
|
|
241
226
|
gain = filter_coeffs[9]
|
|
242
227
|
|
|
243
|
-
for
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
self._conditions[1][chan] = zf
|
|
266
|
-
|
|
267
|
-
y, zf = lfilter(a3_b, denom, y, zi=self._conditions[2][chan])
|
|
268
|
-
self._conditions[2][chan] = zf
|
|
269
|
-
|
|
270
|
-
y, zf = lfilter(a4_b, denom, y, zi=self._conditions[3][chan])
|
|
271
|
-
self._conditions[3][chan] = zf
|
|
272
|
-
|
|
228
|
+
# Pre-build numerator arrays for all 4 stages × all channels
|
|
229
|
+
# Stage 1: normalize by gain
|
|
230
|
+
b1 = np.column_stack([A0 / gain, A11 / gain, A2 / gain]) # (nb, 3)
|
|
231
|
+
# Stage 2-4
|
|
232
|
+
b2 = np.column_stack([A0, A12, A2]) # (nb, 3)
|
|
233
|
+
b3 = np.column_stack([A0, A13, A2]) # (nb, 3)
|
|
234
|
+
b4 = np.column_stack([A0, A14, A2]) # (nb, 3)
|
|
235
|
+
|
|
236
|
+
# Denominator is the same for all 4 stages (per channel)
|
|
237
|
+
denom = np.column_stack([B0, B1, B2]) # (nb, 3)
|
|
238
|
+
|
|
239
|
+
# Process all channels with vectorised per-channel lfilter
|
|
240
|
+
output = np.empty((nb, len(signal)), dtype=np.float64)
|
|
241
|
+
stages_b = [b1, b2, b3, b4]
|
|
242
|
+
|
|
243
|
+
for chan in range(nb):
|
|
244
|
+
y = signal
|
|
245
|
+
for stage_idx, sb in enumerate(stages_b):
|
|
246
|
+
y, zf = lfilter(
|
|
247
|
+
sb[chan], denom[chan], y, zi=self._conditions[stage_idx][chan],
|
|
248
|
+
)
|
|
249
|
+
self._conditions[stage_idx][chan] = zf
|
|
273
250
|
output[chan] = y
|
|
274
251
|
|
|
275
252
|
return output
|
|
@@ -303,6 +280,11 @@ class Spectrogram:
|
|
|
303
280
|
def num_frames(self) -> int:
|
|
304
281
|
return self.data.shape[1]
|
|
305
282
|
|
|
283
|
+
def __repr__(self) -> str:
|
|
284
|
+
return (
|
|
285
|
+
f"Spectrogram(bands={self.num_bands}, frames={self.num_frames})"
|
|
286
|
+
)
|
|
287
|
+
|
|
306
288
|
|
|
307
289
|
def convert_to_db(matrix: NDArray[np.float64]) -> NDArray[np.float64]:
|
|
308
290
|
"""
|
|
@@ -343,16 +325,16 @@ def prepare_spectrograms_for_comparison(
|
|
|
343
325
|
ref_db = np.maximum(ref_db, NOISE_FLOOR_ABSOLUTE_DB)
|
|
344
326
|
deg_db = np.maximum(deg_db, NOISE_FLOOR_ABSOLUTE_DB)
|
|
345
327
|
|
|
346
|
-
# 3. Per-frame relative noise floor
|
|
328
|
+
# 3. Per-frame relative noise floor (vectorised)
|
|
347
329
|
min_cols = min(ref_db.shape[1], deg_db.shape[1])
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
330
|
+
ref_view = ref_db[:, :min_cols]
|
|
331
|
+
deg_view = deg_db[:, :min_cols]
|
|
332
|
+
ref_max = np.max(ref_view, axis=0) # (min_cols,)
|
|
333
|
+
deg_max = np.max(deg_view, axis=0) # (min_cols,)
|
|
334
|
+
any_max = np.maximum(ref_max, deg_max)
|
|
335
|
+
floor_db = any_max - NOISE_FLOOR_RELATIVE_TO_PEAK_DB # (min_cols,)
|
|
336
|
+
ref_db[:, :min_cols] = np.maximum(ref_view, floor_db[np.newaxis, :])
|
|
337
|
+
deg_db[:, :min_cols] = np.maximum(deg_view, floor_db[np.newaxis, :])
|
|
356
338
|
|
|
357
339
|
# 4. Global normalization: subtract global minimum
|
|
358
340
|
lowest = min(float(np.min(ref_db)), float(np.min(deg_db)))
|
|
@@ -51,6 +51,19 @@ class PatchSimilarityResult:
|
|
|
51
51
|
deg_patch_start_time: float = 0.0
|
|
52
52
|
deg_patch_end_time: float = 0.0
|
|
53
53
|
|
|
54
|
+
def __str__(self) -> str:
|
|
55
|
+
return (
|
|
56
|
+
f"PatchSimilarityResult(sim={self.similarity:.4f}, "
|
|
57
|
+
f"ref=[{self.ref_patch_start_time:.3f}-{self.ref_patch_end_time:.3f}], "
|
|
58
|
+
f"deg=[{self.deg_patch_start_time:.3f}-{self.deg_patch_end_time:.3f}])"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def __repr__(self) -> str:
|
|
62
|
+
return (
|
|
63
|
+
f"PatchSimilarityResult(similarity={self.similarity!r}, "
|
|
64
|
+
f"freq_band_means=<{len(self.freq_band_means)} bands>)"
|
|
65
|
+
)
|
|
66
|
+
|
|
54
67
|
|
|
55
68
|
def _valid_2d_conv_with_boundary(
|
|
56
69
|
kernel: NDArray[np.float64], matrix: NDArray[np.float64]
|
|
@@ -43,6 +43,16 @@ class SimilarityResult:
|
|
|
43
43
|
)
|
|
44
44
|
patch_sims: List[PatchSimilarityResult] = field(default_factory=list)
|
|
45
45
|
|
|
46
|
+
def __str__(self) -> str:
|
|
47
|
+
return f"SimilarityResult(moslqo={self.moslqo:.4f}, vnsim={self.vnsim:.4f})"
|
|
48
|
+
|
|
49
|
+
def __repr__(self) -> str:
|
|
50
|
+
return (
|
|
51
|
+
f"SimilarityResult(moslqo={self.moslqo!r}, vnsim={self.vnsim!r}, "
|
|
52
|
+
f"fvnsim=<{len(self.fvnsim)} bands>, "
|
|
53
|
+
f"patch_sims=<{len(self.patch_sims)} patches>)"
|
|
54
|
+
)
|
|
55
|
+
|
|
46
56
|
|
|
47
57
|
# ---------------------------------------------------------------------------
|
|
48
58
|
# Helper functions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: visqol-python
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.6
|
|
4
4
|
Summary: ViSQOL - Virtual Speech Quality Objective Listener (Pure Python)
|
|
5
5
|
Author: Shan Jiang
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -30,6 +30,10 @@ Requires-Dist: soundfile>=0.10
|
|
|
30
30
|
Requires-Dist: libsvm-official>=3.25
|
|
31
31
|
Provides-Extra: test
|
|
32
32
|
Requires-Dist: pytest>=7.0; extra == "test"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
36
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
33
37
|
Dynamic: license-file
|
|
34
38
|
|
|
35
39
|
# ViSQOL (Python)
|
|
@@ -103,6 +107,33 @@ result = api.measure_from_arrays(ref, deg, sample_rate=sr)
|
|
|
103
107
|
print(f"MOS-LQO: {result.moslqo:.4f}")
|
|
104
108
|
```
|
|
105
109
|
|
|
110
|
+
### Batch Evaluation
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from visqol import VisqolApi
|
|
114
|
+
|
|
115
|
+
api = VisqolApi()
|
|
116
|
+
api.create(mode="audio")
|
|
117
|
+
|
|
118
|
+
file_pairs = [
|
|
119
|
+
("ref1.wav", "deg1.wav"),
|
|
120
|
+
("ref2.wav", "deg2.wav"),
|
|
121
|
+
("ref3.wav", "deg3.wav"),
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
# Optional progress callback
|
|
125
|
+
results = api.measure_batch(
|
|
126
|
+
file_pairs,
|
|
127
|
+
progress_callback=lambda done, total: print(f"{done}/{total}"),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for pair, result in zip(file_pairs, results):
|
|
131
|
+
if isinstance(result, Exception):
|
|
132
|
+
print(f"{pair}: FAILED — {result}")
|
|
133
|
+
else:
|
|
134
|
+
print(f"{pair}: MOS-LQO = {result.moslqo:.4f}")
|
|
135
|
+
```
|
|
136
|
+
|
|
106
137
|
### Command Line
|
|
107
138
|
|
|
108
139
|
```bash
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|