visqol-python 3.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- visqol/__init__.py +20 -0
- visqol/__main__.py +92 -0
- visqol/alignment.py +82 -0
- visqol/analysis_window.py +52 -0
- visqol/api.py +110 -0
- visqol/audio_utils.py +90 -0
- visqol/gammatone.py +418 -0
- visqol/model/libsvm_nu_svr_model.txt +324 -0
- visqol/nsim.py +134 -0
- visqol/patch_creator.py +222 -0
- visqol/patch_selector.py +357 -0
- visqol/quality_mapper.py +114 -0
- visqol/signal_utils.py +83 -0
- visqol/visqol_core.py +240 -0
- visqol/visqol_manager.py +194 -0
- visqol_python-3.3.3.dist-info/METADATA +223 -0
- visqol_python-3.3.3.dist-info/RECORD +21 -0
- visqol_python-3.3.3.dist-info/WHEEL +5 -0
- visqol_python-3.3.3.dist-info/entry_points.txt +2 -0
- visqol_python-3.3.3.dist-info/licenses/LICENSE +201 -0
- visqol_python-3.3.3.dist-info/top_level.txt +1 -0
visqol/patch_selector.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic programming patch matching and fine alignment.
|
|
3
|
+
|
|
4
|
+
Corresponds to C++ file: comparison_patches_selector.cc (384 lines)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import math
|
|
9
|
+
import numpy as np
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
from visqol.audio_utils import AudioSignal
|
|
13
|
+
from visqol.analysis_window import AnalysisWindow
|
|
14
|
+
from visqol.nsim import PatchSimilarityResult, measure_patch_similarity
|
|
15
|
+
from visqol.gammatone import (
|
|
16
|
+
GammatoneSpectrogramBuilder, Spectrogram,
|
|
17
|
+
prepare_spectrograms_for_comparison,
|
|
18
|
+
)
|
|
19
|
+
from visqol.alignment import align_and_truncate
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def build_degraded_patch(spectrogram_data: np.ndarray,
|
|
25
|
+
window_beginning: int, window_end: int,
|
|
26
|
+
window_height: int, window_width: int) -> np.ndarray:
|
|
27
|
+
"""
|
|
28
|
+
Build a degraded patch from spectrogram data with zero-padding for out-of-bounds.
|
|
29
|
+
|
|
30
|
+
Matches C++ ComparisonPatchesSelector::BuildDegradedPatch.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
spectrogram_data: (num_bands, num_frames) degraded spectrogram.
|
|
34
|
+
window_beginning: Start frame index (can be negative).
|
|
35
|
+
window_end: End frame index (inclusive).
|
|
36
|
+
window_height: Number of frequency bands.
|
|
37
|
+
window_width: Number of frames per patch.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
(window_height, window_width) patch matrix.
|
|
41
|
+
"""
|
|
42
|
+
num_cols = spectrogram_data.shape[1]
|
|
43
|
+
deg_patch = np.zeros((window_height, window_width))
|
|
44
|
+
|
|
45
|
+
first_real_frame = max(0, window_beginning)
|
|
46
|
+
last_real_frame = min(window_end, num_cols - 1)
|
|
47
|
+
|
|
48
|
+
for row_idx in range(spectrogram_data.shape[0]):
|
|
49
|
+
if first_real_frame <= last_real_frame:
|
|
50
|
+
row_data = spectrogram_data[row_idx,
|
|
51
|
+
first_real_frame:last_real_frame + 1].copy()
|
|
52
|
+
else:
|
|
53
|
+
row_data = np.array([])
|
|
54
|
+
|
|
55
|
+
# Prepend zeros for negative start indices
|
|
56
|
+
if window_beginning < 0:
|
|
57
|
+
row_data = np.concatenate([np.zeros(-window_beginning), row_data])
|
|
58
|
+
|
|
59
|
+
# Append zeros for indices beyond spectrogram
|
|
60
|
+
if window_end > num_cols - 1:
|
|
61
|
+
row_data = np.concatenate(
|
|
62
|
+
[row_data, np.zeros(window_end - (num_cols - 1))]
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Ensure correct width
|
|
66
|
+
if len(row_data) >= window_width:
|
|
67
|
+
deg_patch[row_idx, :] = row_data[:window_width]
|
|
68
|
+
else:
|
|
69
|
+
deg_patch[row_idx, :len(row_data)] = row_data
|
|
70
|
+
|
|
71
|
+
return deg_patch
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _calc_max_num_patches(ref_patch_indices: List[int],
|
|
75
|
+
num_frames_in_deg: int,
|
|
76
|
+
num_frames_per_patch: int) -> int:
|
|
77
|
+
"""
|
|
78
|
+
Calculate max number of patches that fit within degraded spectrogram.
|
|
79
|
+
Matches C++ ComparisonPatchesSelector::CalcMaxNumPatches.
|
|
80
|
+
"""
|
|
81
|
+
num_patches = len(ref_patch_indices)
|
|
82
|
+
if num_patches > 0:
|
|
83
|
+
while (num_patches > 0 and
|
|
84
|
+
ref_patch_indices[num_patches - 1] -
|
|
85
|
+
math.floor(num_frames_per_patch / 2) > num_frames_in_deg):
|
|
86
|
+
num_patches -= 1
|
|
87
|
+
return num_patches
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def find_most_optimal_deg_patches(
|
|
91
|
+
ref_patches: List[np.ndarray],
|
|
92
|
+
ref_patch_indices: List[int],
|
|
93
|
+
spectrogram_data: np.ndarray,
|
|
94
|
+
frame_duration: float,
|
|
95
|
+
search_window_radius: int = 60,
|
|
96
|
+
) -> List[PatchSimilarityResult]:
|
|
97
|
+
"""
|
|
98
|
+
Find the most optimal degraded patches using dynamic programming.
|
|
99
|
+
|
|
100
|
+
Matches C++ ComparisonPatchesSelector::FindMostOptimalDegPatches.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
ref_patches: List of reference patch matrices.
|
|
104
|
+
ref_patch_indices: Start frame indices for reference patches.
|
|
105
|
+
spectrogram_data: Full degraded spectrogram.
|
|
106
|
+
frame_duration: Duration of one frame in seconds.
|
|
107
|
+
search_window_radius: Search window radius in patch units.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of PatchSimilarityResult for each matched pair.
|
|
111
|
+
"""
|
|
112
|
+
num_frames_per_patch = ref_patches[0].shape[1]
|
|
113
|
+
num_bands = ref_patches[0].shape[0]
|
|
114
|
+
num_frames_in_deg = spectrogram_data.shape[1]
|
|
115
|
+
patch_duration = frame_duration * num_frames_per_patch
|
|
116
|
+
search_window = search_window_radius * num_frames_per_patch
|
|
117
|
+
|
|
118
|
+
num_patches = _calc_max_num_patches(
|
|
119
|
+
ref_patch_indices, num_frames_in_deg, num_frames_per_patch
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if num_patches == 0:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
"Degraded file was too short, different, or misaligned to score "
|
|
125
|
+
"any of the reference patches."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
if num_patches < len(ref_patch_indices):
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Dropping %d (of %d) reference patches due to degraded file "
|
|
131
|
+
"being misaligned or too short.",
|
|
132
|
+
len(ref_patch_indices) - num_patches, len(ref_patch_indices)
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Pre-build all degraded patches
|
|
136
|
+
deg_patches = []
|
|
137
|
+
for offset in range(num_frames_in_deg):
|
|
138
|
+
patch = build_degraded_patch(
|
|
139
|
+
spectrogram_data, offset,
|
|
140
|
+
offset + num_frames_per_patch - 1,
|
|
141
|
+
num_bands, num_frames_per_patch
|
|
142
|
+
)
|
|
143
|
+
deg_patches.append(patch)
|
|
144
|
+
|
|
145
|
+
# DP tables
|
|
146
|
+
cumulative_dp = np.full((len(ref_patch_indices), num_frames_in_deg),
|
|
147
|
+
0.0, dtype=np.float64)
|
|
148
|
+
backtrace = np.full((len(ref_patch_indices), num_frames_in_deg),
|
|
149
|
+
-1, dtype=np.int64)
|
|
150
|
+
|
|
151
|
+
# Forward pass
|
|
152
|
+
for patch_index in range(num_patches):
|
|
153
|
+
ref_frame_index = ref_patch_indices[patch_index]
|
|
154
|
+
|
|
155
|
+
low = max(0, ref_frame_index - search_window)
|
|
156
|
+
high = min(num_frames_in_deg - 1, ref_frame_index + search_window)
|
|
157
|
+
|
|
158
|
+
for slide_offset in range(low, high + 1):
|
|
159
|
+
if slide_offset >= num_frames_in_deg:
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
deg_patch = deg_patches[slide_offset]
|
|
163
|
+
sim_result = measure_patch_similarity(
|
|
164
|
+
ref_patches[patch_index], deg_patch
|
|
165
|
+
)
|
|
166
|
+
sim_val = sim_result.similarity
|
|
167
|
+
|
|
168
|
+
past_slide_offset = -1
|
|
169
|
+
highest_sim = -np.inf
|
|
170
|
+
|
|
171
|
+
if patch_index > 0:
|
|
172
|
+
lower_limit = max(0,
|
|
173
|
+
ref_patch_indices[patch_index - 1] - search_window)
|
|
174
|
+
|
|
175
|
+
# Search backwards for best previous cumulative score
|
|
176
|
+
back_offset = slide_offset - 1
|
|
177
|
+
while back_offset >= lower_limit:
|
|
178
|
+
if cumulative_dp[patch_index - 1, back_offset] > highest_sim:
|
|
179
|
+
highest_sim = cumulative_dp[patch_index - 1, back_offset]
|
|
180
|
+
past_slide_offset = back_offset
|
|
181
|
+
back_offset -= 1
|
|
182
|
+
|
|
183
|
+
sim_val += highest_sim
|
|
184
|
+
|
|
185
|
+
# Packet loss handling: check if skipping current is better
|
|
186
|
+
if cumulative_dp[patch_index - 1, slide_offset] > sim_val:
|
|
187
|
+
sim_val = cumulative_dp[patch_index - 1, slide_offset]
|
|
188
|
+
past_slide_offset = slide_offset
|
|
189
|
+
|
|
190
|
+
cumulative_dp[patch_index, slide_offset] = sim_val
|
|
191
|
+
backtrace[patch_index, slide_offset] = past_slide_offset
|
|
192
|
+
|
|
193
|
+
# Find best ending offset
|
|
194
|
+
last_index = num_patches - 1
|
|
195
|
+
lower_limit = max(0, ref_patch_indices[last_index] - search_window)
|
|
196
|
+
upper_limit = min(num_frames_in_deg - 1,
|
|
197
|
+
ref_patch_indices[last_index] + search_window)
|
|
198
|
+
|
|
199
|
+
max_score = -np.inf
|
|
200
|
+
last_offset = lower_limit
|
|
201
|
+
for slide_offset in range(lower_limit, upper_limit + 1):
|
|
202
|
+
if slide_offset >= num_frames_in_deg:
|
|
203
|
+
break
|
|
204
|
+
if cumulative_dp[last_index, slide_offset] > max_score:
|
|
205
|
+
max_score = cumulative_dp[last_index, slide_offset]
|
|
206
|
+
last_offset = slide_offset
|
|
207
|
+
|
|
208
|
+
# Backtrace to find best path
|
|
209
|
+
best_deg_patches = [PatchSimilarityResult() for _ in range(num_patches)]
|
|
210
|
+
|
|
211
|
+
for patch_index in range(num_patches - 1, -1, -1):
|
|
212
|
+
ref_patch = ref_patches[patch_index]
|
|
213
|
+
deg_patch = build_degraded_patch(
|
|
214
|
+
spectrogram_data, last_offset,
|
|
215
|
+
last_offset + num_frames_per_patch - 1,
|
|
216
|
+
num_bands, num_frames_per_patch
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
sim_result = measure_patch_similarity(ref_patch, deg_patch)
|
|
220
|
+
|
|
221
|
+
# Check if this was a packet loss (no match found)
|
|
222
|
+
if last_offset == backtrace[patch_index, last_offset]:
|
|
223
|
+
sim_result.deg_patch_start_time = 0.0
|
|
224
|
+
sim_result.deg_patch_end_time = 0.0
|
|
225
|
+
sim_result.similarity = 0.0
|
|
226
|
+
sim_result.freq_band_means = np.zeros(num_bands)
|
|
227
|
+
else:
|
|
228
|
+
sim_result.deg_patch_start_time = last_offset * frame_duration
|
|
229
|
+
sim_result.deg_patch_end_time = (
|
|
230
|
+
sim_result.deg_patch_start_time + patch_duration
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
sim_result.ref_patch_start_time = (
|
|
234
|
+
ref_patch_indices[patch_index] * frame_duration
|
|
235
|
+
)
|
|
236
|
+
sim_result.ref_patch_end_time = (
|
|
237
|
+
sim_result.ref_patch_start_time + patch_duration
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
best_deg_patches[patch_index] = sim_result
|
|
241
|
+
last_offset = backtrace[patch_index, last_offset]
|
|
242
|
+
|
|
243
|
+
return best_deg_patches
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def slice_signal(signal: AudioSignal, start_time: float,
|
|
247
|
+
end_time: float) -> AudioSignal:
|
|
248
|
+
"""
|
|
249
|
+
Slice an audio signal by time range.
|
|
250
|
+
Matches C++ ComparisonPatchesSelector::Slice.
|
|
251
|
+
"""
|
|
252
|
+
start_index = max(0, int(start_time * signal.sample_rate))
|
|
253
|
+
end_index = min(len(signal.data) - 1,
|
|
254
|
+
int(end_time * signal.sample_rate))
|
|
255
|
+
|
|
256
|
+
sliced = signal.data[start_index:end_index].copy()
|
|
257
|
+
|
|
258
|
+
# Add silence at end if needed
|
|
259
|
+
end_time_diff = end_time * signal.sample_rate - len(signal.data)
|
|
260
|
+
if end_time_diff > 0:
|
|
261
|
+
sliced = np.concatenate([np.zeros(int(end_time_diff)), sliced])
|
|
262
|
+
|
|
263
|
+
# Add silence at beginning if needed
|
|
264
|
+
if start_time < 0:
|
|
265
|
+
pre_silence = np.zeros(int(-start_time * signal.sample_rate))
|
|
266
|
+
sliced = np.concatenate([pre_silence, sliced])
|
|
267
|
+
|
|
268
|
+
return AudioSignal(sliced, signal.sample_rate)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def finely_align_and_recreate_patches(
|
|
272
|
+
sim_results: List[PatchSimilarityResult],
|
|
273
|
+
ref_signal: AudioSignal,
|
|
274
|
+
deg_signal: AudioSignal,
|
|
275
|
+
spect_builder: GammatoneSpectrogramBuilder,
|
|
276
|
+
window: AnalysisWindow,
|
|
277
|
+
) -> List[PatchSimilarityResult]:
|
|
278
|
+
"""
|
|
279
|
+
Fine-align each matched patch pair in the time domain.
|
|
280
|
+
|
|
281
|
+
Matches C++ ComparisonPatchesSelector::FinelyAlignAndRecreatePatches.
|
|
282
|
+
|
|
283
|
+
For each matched pair:
|
|
284
|
+
1. Extract audio sub-signals
|
|
285
|
+
2. Re-align at fine granularity
|
|
286
|
+
3. Rebuild spectrograms
|
|
287
|
+
4. Recompute NSIM
|
|
288
|
+
5. Keep the better result (original or re-aligned)
|
|
289
|
+
"""
|
|
290
|
+
realigned_results = list(sim_results) # copy
|
|
291
|
+
|
|
292
|
+
for i, sim_result in enumerate(sim_results):
|
|
293
|
+
# Skip packet-loss patches
|
|
294
|
+
if (sim_result.deg_patch_start_time == sim_result.deg_patch_end_time
|
|
295
|
+
and sim_result.deg_patch_start_time == 0.0):
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
# 1. Extract audio segments
|
|
299
|
+
ref_audio = slice_signal(ref_signal,
|
|
300
|
+
sim_result.ref_patch_start_time,
|
|
301
|
+
sim_result.ref_patch_end_time)
|
|
302
|
+
deg_audio = slice_signal(deg_signal,
|
|
303
|
+
sim_result.deg_patch_start_time,
|
|
304
|
+
sim_result.deg_patch_end_time)
|
|
305
|
+
|
|
306
|
+
# 2. Fine alignment
|
|
307
|
+
try:
|
|
308
|
+
ref_aligned, deg_aligned, lag = align_and_truncate(
|
|
309
|
+
ref_audio, deg_audio
|
|
310
|
+
)
|
|
311
|
+
except Exception:
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
# Check we have enough samples
|
|
315
|
+
if (len(ref_aligned.data) <= window.size or
|
|
316
|
+
len(deg_aligned.data) <= window.size):
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
# 3. Rebuild spectrograms
|
|
320
|
+
try:
|
|
321
|
+
ref_spec = spect_builder.build(ref_aligned, window)
|
|
322
|
+
deg_spec = spect_builder.build(deg_aligned, window)
|
|
323
|
+
except (ValueError, Exception):
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
ref_db, deg_db = prepare_spectrograms_for_comparison(ref_spec, deg_spec)
|
|
327
|
+
|
|
328
|
+
# 4. Recompute NSIM
|
|
329
|
+
new_sim = measure_patch_similarity(ref_db, deg_db)
|
|
330
|
+
|
|
331
|
+
# 5. Keep better result
|
|
332
|
+
if new_sim.similarity < sim_result.similarity:
|
|
333
|
+
realigned_results[i] = sim_result
|
|
334
|
+
else:
|
|
335
|
+
new_ref_duration = ref_aligned.duration
|
|
336
|
+
new_deg_duration = deg_aligned.duration
|
|
337
|
+
|
|
338
|
+
if lag > 0:
|
|
339
|
+
new_sim.ref_patch_start_time = (
|
|
340
|
+
sim_result.ref_patch_start_time + lag
|
|
341
|
+
)
|
|
342
|
+
new_sim.deg_patch_start_time = sim_result.deg_patch_start_time
|
|
343
|
+
else:
|
|
344
|
+
new_sim.ref_patch_start_time = sim_result.ref_patch_start_time
|
|
345
|
+
new_sim.deg_patch_start_time = (
|
|
346
|
+
sim_result.deg_patch_start_time - lag
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
new_sim.ref_patch_end_time = (
|
|
350
|
+
new_sim.ref_patch_start_time + new_ref_duration
|
|
351
|
+
)
|
|
352
|
+
new_sim.deg_patch_end_time = (
|
|
353
|
+
new_sim.deg_patch_start_time + new_deg_duration
|
|
354
|
+
)
|
|
355
|
+
realigned_results[i] = new_sim
|
|
356
|
+
|
|
357
|
+
return realigned_results
|
visqol/quality_mapper.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quality mappers: SVR (Audio mode) and Exponential (Speech mode).
|
|
3
|
+
|
|
4
|
+
Corresponds to C++ files:
|
|
5
|
+
- svr_similarity_to_quality_mapper.cc
|
|
6
|
+
- speech_similarity_to_quality_mapper.cc
|
|
7
|
+
- support_vector_regression_model.cc
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import logging
|
|
12
|
+
import numpy as np
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SimilarityToQualityMapper(ABC):
|
|
19
|
+
"""Abstract base class for similarity-to-quality mapping."""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def init(self) -> None:
|
|
23
|
+
"""Initialize the mapper (e.g. load model)."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def predict_quality(self, fvnsim: np.ndarray,
|
|
28
|
+
fvnsim10: np.ndarray = None,
|
|
29
|
+
fstdnsim: np.ndarray = None,
|
|
30
|
+
fvdegenergy: np.ndarray = None) -> float:
|
|
31
|
+
"""Predict MOS quality from NSIM feature vectors."""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SvrSimilarityToQualityMapper(SimilarityToQualityMapper):
|
|
36
|
+
"""
|
|
37
|
+
SVR-based quality mapper for Audio mode.
|
|
38
|
+
Uses libsvm to load and predict from pre-trained SVR model.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model_path: str):
|
|
42
|
+
self.model_path = model_path
|
|
43
|
+
self.model = None
|
|
44
|
+
|
|
45
|
+
def init(self):
|
|
46
|
+
"""Load the libsvm model file."""
|
|
47
|
+
if not os.path.exists(self.model_path):
|
|
48
|
+
raise FileNotFoundError(
|
|
49
|
+
f"SVR model file not found: {self.model_path}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
from svmutil import svm_load_model
|
|
54
|
+
self.model = svm_load_model(self.model_path)
|
|
55
|
+
except ImportError:
|
|
56
|
+
# Try alternative import paths
|
|
57
|
+
try:
|
|
58
|
+
from libsvm.svmutil import svm_load_model
|
|
59
|
+
self.model = svm_load_model(self.model_path)
|
|
60
|
+
except ImportError:
|
|
61
|
+
raise ImportError(
|
|
62
|
+
"libsvm is required for Audio mode SVR quality mapping. "
|
|
63
|
+
"Install with: pip install libsvm-official"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def predict_quality(self, fvnsim: np.ndarray,
|
|
67
|
+
fvnsim10: np.ndarray = None,
|
|
68
|
+
fstdnsim: np.ndarray = None,
|
|
69
|
+
fvdegenergy: np.ndarray = None) -> float:
|
|
70
|
+
"""
|
|
71
|
+
Predict MOS using SVR model.
|
|
72
|
+
Only fvnsim is used as input features.
|
|
73
|
+
"""
|
|
74
|
+
try:
|
|
75
|
+
from svmutil import svm_predict
|
|
76
|
+
except ImportError:
|
|
77
|
+
from libsvm.svmutil import svm_predict
|
|
78
|
+
|
|
79
|
+
# Convert to libsvm format: {1: val1, 2: val2, ...}
|
|
80
|
+
x = {i + 1: float(v) for i, v in enumerate(fvnsim)}
|
|
81
|
+
# svm_predict returns (predicted_labels, (MSE, SCC, ...), decision_values)
|
|
82
|
+
predicted_labels, _, _ = svm_predict([0], [x], self.model, '-q')
|
|
83
|
+
return float(np.clip(predicted_labels[0], 1.0, 5.0))
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class SpeechSimilarityToQualityMapper(SimilarityToQualityMapper):
|
|
87
|
+
"""
|
|
88
|
+
Exponential-fit quality mapper for Speech mode.
|
|
89
|
+
Uses hardcoded parameters fitted on TCD-VOIP dataset.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# Fitted parameters (from C++ speech_similarity_to_quality_mapper.cc)
|
|
93
|
+
FIT_A = -262.847869
|
|
94
|
+
FIT_B = 0.0154302525
|
|
95
|
+
FIT_X0 = -361.063949
|
|
96
|
+
FIT_SCALE = 1.245063
|
|
97
|
+
|
|
98
|
+
def __init__(self, scale_to_max_mos: bool = True):
|
|
99
|
+
self.scale = self.FIT_SCALE if scale_to_max_mos else 1.0
|
|
100
|
+
|
|
101
|
+
def init(self):
|
|
102
|
+
"""No initialization needed for exponential mapper."""
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
def predict_quality(self, fvnsim: np.ndarray,
|
|
106
|
+
fvnsim10: np.ndarray = None,
|
|
107
|
+
fstdnsim: np.ndarray = None,
|
|
108
|
+
fvdegenergy: np.ndarray = None) -> float:
|
|
109
|
+
"""
|
|
110
|
+
Predict MOS using exponential fit: a + exp(b * (x - x0)).
|
|
111
|
+
"""
|
|
112
|
+
nsim_mean = float(np.mean(fvnsim))
|
|
113
|
+
mos = self.FIT_A + np.exp(self.FIT_B * (nsim_mean - self.FIT_X0))
|
|
114
|
+
return float(np.clip(mos * self.scale, 1.0, 5.0))
|
visqol/signal_utils.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Signal processing utilities: envelope, cross-correlation, normalization.
|
|
3
|
+
|
|
4
|
+
Corresponds to C++ files: envelope.cc, xcorr.cc, misc_math.cc
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import signal as scipy_signal
|
|
9
|
+
from scipy.fft import fft, ifft
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def upper_envelope(sig: np.ndarray) -> np.ndarray:
|
|
13
|
+
"""
|
|
14
|
+
Calculate the upper envelope using Hilbert transform.
|
|
15
|
+
Matches C++ Envelope::CalcUpperEnv which:
|
|
16
|
+
1. Centers signal by subtracting mean
|
|
17
|
+
2. Computes Hilbert transform
|
|
18
|
+
3. Takes absolute value (amplitude envelope)
|
|
19
|
+
4. Adds mean back
|
|
20
|
+
"""
|
|
21
|
+
mean_val = np.mean(sig)
|
|
22
|
+
centered = sig - mean_val
|
|
23
|
+
analytic = scipy_signal.hilbert(centered)
|
|
24
|
+
env = np.abs(analytic) + mean_val
|
|
25
|
+
return env
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def find_best_lag(ref: np.ndarray, deg: np.ndarray) -> int:
|
|
29
|
+
"""
|
|
30
|
+
Find the lag that maximizes cross-correlation between two signals.
|
|
31
|
+
Returns the lag (in samples) — positive means deg is delayed relative to ref.
|
|
32
|
+
|
|
33
|
+
Matches C++ XCorr::FindLowestLagIndex which uses FFT-based cross-correlation.
|
|
34
|
+
"""
|
|
35
|
+
max_lag = max(len(ref), len(deg)) - 1
|
|
36
|
+
|
|
37
|
+
# Pad to same length
|
|
38
|
+
n = max(len(ref), len(deg))
|
|
39
|
+
ref_padded = np.zeros(n)
|
|
40
|
+
deg_padded = np.zeros(n)
|
|
41
|
+
ref_padded[:len(ref)] = ref
|
|
42
|
+
deg_padded[:len(deg)] = deg
|
|
43
|
+
|
|
44
|
+
# FFT-based cross-correlation
|
|
45
|
+
# fft_points = next power of 2 >= 2*n - 1
|
|
46
|
+
fft_points = 1
|
|
47
|
+
while fft_points < 2 * n - 1:
|
|
48
|
+
fft_points *= 2
|
|
49
|
+
|
|
50
|
+
fft_ref = fft(ref_padded, n=fft_points)
|
|
51
|
+
fft_deg = fft(deg_padded, n=fft_points)
|
|
52
|
+
pointwise = fft_ref * np.conj(fft_deg)
|
|
53
|
+
xcorr_full = np.real(ifft(pointwise))
|
|
54
|
+
|
|
55
|
+
# Build correlation vector: [negative lags, positive lags]
|
|
56
|
+
# Negative correlations: last max_lag elements
|
|
57
|
+
neg_corrs = xcorr_full[-max_lag:].tolist()
|
|
58
|
+
# Positive correlations: first max_lag+1 elements
|
|
59
|
+
pos_corrs = xcorr_full[:max_lag + 1].tolist()
|
|
60
|
+
corrs = neg_corrs + pos_corrs
|
|
61
|
+
|
|
62
|
+
best_idx = int(np.argmax(corrs))
|
|
63
|
+
return best_idx - max_lag
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def normalize(mat: np.ndarray) -> np.ndarray:
|
|
67
|
+
"""
|
|
68
|
+
Normalize a matrix/vector to [0, 1] range.
|
|
69
|
+
Matches C++ MiscMath::Normalize.
|
|
70
|
+
"""
|
|
71
|
+
min_val = np.min(mat)
|
|
72
|
+
max_val = np.max(mat)
|
|
73
|
+
if max_val == min_val:
|
|
74
|
+
return np.zeros_like(mat)
|
|
75
|
+
return (mat - min_val) / (max_val - min_val)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def exponential_from_fit(x: float, a: float, b: float, x0: float) -> float:
|
|
79
|
+
"""
|
|
80
|
+
Evaluate exponential function: a + exp(b * (x - x0))
|
|
81
|
+
Matches C++ MiscMath::ExponentialFromFit.
|
|
82
|
+
"""
|
|
83
|
+
return a + np.exp(b * (x - x0))
|