visqol-python 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,357 @@
1
+ """
2
+ Dynamic programming patch matching and fine alignment.
3
+
4
+ Corresponds to C++ file: comparison_patches_selector.cc (384 lines)
5
+ """
6
+
7
+ import logging
8
+ import math
9
+ import numpy as np
10
+ from typing import List, Optional
11
+
12
+ from visqol.audio_utils import AudioSignal
13
+ from visqol.analysis_window import AnalysisWindow
14
+ from visqol.nsim import PatchSimilarityResult, measure_patch_similarity
15
+ from visqol.gammatone import (
16
+ GammatoneSpectrogramBuilder, Spectrogram,
17
+ prepare_spectrograms_for_comparison,
18
+ )
19
+ from visqol.alignment import align_and_truncate
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def build_degraded_patch(spectrogram_data: np.ndarray,
25
+ window_beginning: int, window_end: int,
26
+ window_height: int, window_width: int) -> np.ndarray:
27
+ """
28
+ Build a degraded patch from spectrogram data with zero-padding for out-of-bounds.
29
+
30
+ Matches C++ ComparisonPatchesSelector::BuildDegradedPatch.
31
+
32
+ Args:
33
+ spectrogram_data: (num_bands, num_frames) degraded spectrogram.
34
+ window_beginning: Start frame index (can be negative).
35
+ window_end: End frame index (inclusive).
36
+ window_height: Number of frequency bands.
37
+ window_width: Number of frames per patch.
38
+
39
+ Returns:
40
+ (window_height, window_width) patch matrix.
41
+ """
42
+ num_cols = spectrogram_data.shape[1]
43
+ deg_patch = np.zeros((window_height, window_width))
44
+
45
+ first_real_frame = max(0, window_beginning)
46
+ last_real_frame = min(window_end, num_cols - 1)
47
+
48
+ for row_idx in range(spectrogram_data.shape[0]):
49
+ if first_real_frame <= last_real_frame:
50
+ row_data = spectrogram_data[row_idx,
51
+ first_real_frame:last_real_frame + 1].copy()
52
+ else:
53
+ row_data = np.array([])
54
+
55
+ # Prepend zeros for negative start indices
56
+ if window_beginning < 0:
57
+ row_data = np.concatenate([np.zeros(-window_beginning), row_data])
58
+
59
+ # Append zeros for indices beyond spectrogram
60
+ if window_end > num_cols - 1:
61
+ row_data = np.concatenate(
62
+ [row_data, np.zeros(window_end - (num_cols - 1))]
63
+ )
64
+
65
+ # Ensure correct width
66
+ if len(row_data) >= window_width:
67
+ deg_patch[row_idx, :] = row_data[:window_width]
68
+ else:
69
+ deg_patch[row_idx, :len(row_data)] = row_data
70
+
71
+ return deg_patch
72
+
73
+
74
+ def _calc_max_num_patches(ref_patch_indices: List[int],
75
+ num_frames_in_deg: int,
76
+ num_frames_per_patch: int) -> int:
77
+ """
78
+ Calculate max number of patches that fit within degraded spectrogram.
79
+ Matches C++ ComparisonPatchesSelector::CalcMaxNumPatches.
80
+ """
81
+ num_patches = len(ref_patch_indices)
82
+ if num_patches > 0:
83
+ while (num_patches > 0 and
84
+ ref_patch_indices[num_patches - 1] -
85
+ math.floor(num_frames_per_patch / 2) > num_frames_in_deg):
86
+ num_patches -= 1
87
+ return num_patches
88
+
89
+
90
+ def find_most_optimal_deg_patches(
91
+ ref_patches: List[np.ndarray],
92
+ ref_patch_indices: List[int],
93
+ spectrogram_data: np.ndarray,
94
+ frame_duration: float,
95
+ search_window_radius: int = 60,
96
+ ) -> List[PatchSimilarityResult]:
97
+ """
98
+ Find the most optimal degraded patches using dynamic programming.
99
+
100
+ Matches C++ ComparisonPatchesSelector::FindMostOptimalDegPatches.
101
+
102
+ Args:
103
+ ref_patches: List of reference patch matrices.
104
+ ref_patch_indices: Start frame indices for reference patches.
105
+ spectrogram_data: Full degraded spectrogram.
106
+ frame_duration: Duration of one frame in seconds.
107
+ search_window_radius: Search window radius in patch units.
108
+
109
+ Returns:
110
+ List of PatchSimilarityResult for each matched pair.
111
+ """
112
+ num_frames_per_patch = ref_patches[0].shape[1]
113
+ num_bands = ref_patches[0].shape[0]
114
+ num_frames_in_deg = spectrogram_data.shape[1]
115
+ patch_duration = frame_duration * num_frames_per_patch
116
+ search_window = search_window_radius * num_frames_per_patch
117
+
118
+ num_patches = _calc_max_num_patches(
119
+ ref_patch_indices, num_frames_in_deg, num_frames_per_patch
120
+ )
121
+
122
+ if num_patches == 0:
123
+ raise ValueError(
124
+ "Degraded file was too short, different, or misaligned to score "
125
+ "any of the reference patches."
126
+ )
127
+
128
+ if num_patches < len(ref_patch_indices):
129
+ logger.warning(
130
+ "Dropping %d (of %d) reference patches due to degraded file "
131
+ "being misaligned or too short.",
132
+ len(ref_patch_indices) - num_patches, len(ref_patch_indices)
133
+ )
134
+
135
+ # Pre-build all degraded patches
136
+ deg_patches = []
137
+ for offset in range(num_frames_in_deg):
138
+ patch = build_degraded_patch(
139
+ spectrogram_data, offset,
140
+ offset + num_frames_per_patch - 1,
141
+ num_bands, num_frames_per_patch
142
+ )
143
+ deg_patches.append(patch)
144
+
145
+ # DP tables
146
+ cumulative_dp = np.full((len(ref_patch_indices), num_frames_in_deg),
147
+ 0.0, dtype=np.float64)
148
+ backtrace = np.full((len(ref_patch_indices), num_frames_in_deg),
149
+ -1, dtype=np.int64)
150
+
151
+ # Forward pass
152
+ for patch_index in range(num_patches):
153
+ ref_frame_index = ref_patch_indices[patch_index]
154
+
155
+ low = max(0, ref_frame_index - search_window)
156
+ high = min(num_frames_in_deg - 1, ref_frame_index + search_window)
157
+
158
+ for slide_offset in range(low, high + 1):
159
+ if slide_offset >= num_frames_in_deg:
160
+ break
161
+
162
+ deg_patch = deg_patches[slide_offset]
163
+ sim_result = measure_patch_similarity(
164
+ ref_patches[patch_index], deg_patch
165
+ )
166
+ sim_val = sim_result.similarity
167
+
168
+ past_slide_offset = -1
169
+ highest_sim = -np.inf
170
+
171
+ if patch_index > 0:
172
+ lower_limit = max(0,
173
+ ref_patch_indices[patch_index - 1] - search_window)
174
+
175
+ # Search backwards for best previous cumulative score
176
+ back_offset = slide_offset - 1
177
+ while back_offset >= lower_limit:
178
+ if cumulative_dp[patch_index - 1, back_offset] > highest_sim:
179
+ highest_sim = cumulative_dp[patch_index - 1, back_offset]
180
+ past_slide_offset = back_offset
181
+ back_offset -= 1
182
+
183
+ sim_val += highest_sim
184
+
185
+ # Packet loss handling: check if skipping current is better
186
+ if cumulative_dp[patch_index - 1, slide_offset] > sim_val:
187
+ sim_val = cumulative_dp[patch_index - 1, slide_offset]
188
+ past_slide_offset = slide_offset
189
+
190
+ cumulative_dp[patch_index, slide_offset] = sim_val
191
+ backtrace[patch_index, slide_offset] = past_slide_offset
192
+
193
+ # Find best ending offset
194
+ last_index = num_patches - 1
195
+ lower_limit = max(0, ref_patch_indices[last_index] - search_window)
196
+ upper_limit = min(num_frames_in_deg - 1,
197
+ ref_patch_indices[last_index] + search_window)
198
+
199
+ max_score = -np.inf
200
+ last_offset = lower_limit
201
+ for slide_offset in range(lower_limit, upper_limit + 1):
202
+ if slide_offset >= num_frames_in_deg:
203
+ break
204
+ if cumulative_dp[last_index, slide_offset] > max_score:
205
+ max_score = cumulative_dp[last_index, slide_offset]
206
+ last_offset = slide_offset
207
+
208
+ # Backtrace to find best path
209
+ best_deg_patches = [PatchSimilarityResult() for _ in range(num_patches)]
210
+
211
+ for patch_index in range(num_patches - 1, -1, -1):
212
+ ref_patch = ref_patches[patch_index]
213
+ deg_patch = build_degraded_patch(
214
+ spectrogram_data, last_offset,
215
+ last_offset + num_frames_per_patch - 1,
216
+ num_bands, num_frames_per_patch
217
+ )
218
+
219
+ sim_result = measure_patch_similarity(ref_patch, deg_patch)
220
+
221
+ # Check if this was a packet loss (no match found)
222
+ if last_offset == backtrace[patch_index, last_offset]:
223
+ sim_result.deg_patch_start_time = 0.0
224
+ sim_result.deg_patch_end_time = 0.0
225
+ sim_result.similarity = 0.0
226
+ sim_result.freq_band_means = np.zeros(num_bands)
227
+ else:
228
+ sim_result.deg_patch_start_time = last_offset * frame_duration
229
+ sim_result.deg_patch_end_time = (
230
+ sim_result.deg_patch_start_time + patch_duration
231
+ )
232
+
233
+ sim_result.ref_patch_start_time = (
234
+ ref_patch_indices[patch_index] * frame_duration
235
+ )
236
+ sim_result.ref_patch_end_time = (
237
+ sim_result.ref_patch_start_time + patch_duration
238
+ )
239
+
240
+ best_deg_patches[patch_index] = sim_result
241
+ last_offset = backtrace[patch_index, last_offset]
242
+
243
+ return best_deg_patches
244
+
245
+
246
+ def slice_signal(signal: AudioSignal, start_time: float,
247
+ end_time: float) -> AudioSignal:
248
+ """
249
+ Slice an audio signal by time range.
250
+ Matches C++ ComparisonPatchesSelector::Slice.
251
+ """
252
+ start_index = max(0, int(start_time * signal.sample_rate))
253
+ end_index = min(len(signal.data) - 1,
254
+ int(end_time * signal.sample_rate))
255
+
256
+ sliced = signal.data[start_index:end_index].copy()
257
+
258
+ # Add silence at end if needed
259
+ end_time_diff = end_time * signal.sample_rate - len(signal.data)
260
+ if end_time_diff > 0:
261
+ sliced = np.concatenate([np.zeros(int(end_time_diff)), sliced])
262
+
263
+ # Add silence at beginning if needed
264
+ if start_time < 0:
265
+ pre_silence = np.zeros(int(-start_time * signal.sample_rate))
266
+ sliced = np.concatenate([pre_silence, sliced])
267
+
268
+ return AudioSignal(sliced, signal.sample_rate)
269
+
270
+
271
+ def finely_align_and_recreate_patches(
272
+ sim_results: List[PatchSimilarityResult],
273
+ ref_signal: AudioSignal,
274
+ deg_signal: AudioSignal,
275
+ spect_builder: GammatoneSpectrogramBuilder,
276
+ window: AnalysisWindow,
277
+ ) -> List[PatchSimilarityResult]:
278
+ """
279
+ Fine-align each matched patch pair in the time domain.
280
+
281
+ Matches C++ ComparisonPatchesSelector::FinelyAlignAndRecreatePatches.
282
+
283
+ For each matched pair:
284
+ 1. Extract audio sub-signals
285
+ 2. Re-align at fine granularity
286
+ 3. Rebuild spectrograms
287
+ 4. Recompute NSIM
288
+ 5. Keep the better result (original or re-aligned)
289
+ """
290
+ realigned_results = list(sim_results) # copy
291
+
292
+ for i, sim_result in enumerate(sim_results):
293
+ # Skip packet-loss patches
294
+ if (sim_result.deg_patch_start_time == sim_result.deg_patch_end_time
295
+ and sim_result.deg_patch_start_time == 0.0):
296
+ continue
297
+
298
+ # 1. Extract audio segments
299
+ ref_audio = slice_signal(ref_signal,
300
+ sim_result.ref_patch_start_time,
301
+ sim_result.ref_patch_end_time)
302
+ deg_audio = slice_signal(deg_signal,
303
+ sim_result.deg_patch_start_time,
304
+ sim_result.deg_patch_end_time)
305
+
306
+ # 2. Fine alignment
307
+ try:
308
+ ref_aligned, deg_aligned, lag = align_and_truncate(
309
+ ref_audio, deg_audio
310
+ )
311
+ except Exception:
312
+ continue
313
+
314
+ # Check we have enough samples
315
+ if (len(ref_aligned.data) <= window.size or
316
+ len(deg_aligned.data) <= window.size):
317
+ continue
318
+
319
+ # 3. Rebuild spectrograms
320
+ try:
321
+ ref_spec = spect_builder.build(ref_aligned, window)
322
+ deg_spec = spect_builder.build(deg_aligned, window)
323
+ except (ValueError, Exception):
324
+ continue
325
+
326
+ ref_db, deg_db = prepare_spectrograms_for_comparison(ref_spec, deg_spec)
327
+
328
+ # 4. Recompute NSIM
329
+ new_sim = measure_patch_similarity(ref_db, deg_db)
330
+
331
+ # 5. Keep better result
332
+ if new_sim.similarity < sim_result.similarity:
333
+ realigned_results[i] = sim_result
334
+ else:
335
+ new_ref_duration = ref_aligned.duration
336
+ new_deg_duration = deg_aligned.duration
337
+
338
+ if lag > 0:
339
+ new_sim.ref_patch_start_time = (
340
+ sim_result.ref_patch_start_time + lag
341
+ )
342
+ new_sim.deg_patch_start_time = sim_result.deg_patch_start_time
343
+ else:
344
+ new_sim.ref_patch_start_time = sim_result.ref_patch_start_time
345
+ new_sim.deg_patch_start_time = (
346
+ sim_result.deg_patch_start_time - lag
347
+ )
348
+
349
+ new_sim.ref_patch_end_time = (
350
+ new_sim.ref_patch_start_time + new_ref_duration
351
+ )
352
+ new_sim.deg_patch_end_time = (
353
+ new_sim.deg_patch_start_time + new_deg_duration
354
+ )
355
+ realigned_results[i] = new_sim
356
+
357
+ return realigned_results
@@ -0,0 +1,114 @@
1
+ """
2
+ Quality mappers: SVR (Audio mode) and Exponential (Speech mode).
3
+
4
+ Corresponds to C++ files:
5
+ - svr_similarity_to_quality_mapper.cc
6
+ - speech_similarity_to_quality_mapper.cc
7
+ - support_vector_regression_model.cc
8
+ """
9
+
10
+ import os
11
+ import logging
12
+ import numpy as np
13
+ from abc import ABC, abstractmethod
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class SimilarityToQualityMapper(ABC):
19
+ """Abstract base class for similarity-to-quality mapping."""
20
+
21
+ @abstractmethod
22
+ def init(self) -> None:
23
+ """Initialize the mapper (e.g. load model)."""
24
+ pass
25
+
26
+ @abstractmethod
27
+ def predict_quality(self, fvnsim: np.ndarray,
28
+ fvnsim10: np.ndarray = None,
29
+ fstdnsim: np.ndarray = None,
30
+ fvdegenergy: np.ndarray = None) -> float:
31
+ """Predict MOS quality from NSIM feature vectors."""
32
+ pass
33
+
34
+
35
+ class SvrSimilarityToQualityMapper(SimilarityToQualityMapper):
36
+ """
37
+ SVR-based quality mapper for Audio mode.
38
+ Uses libsvm to load and predict from pre-trained SVR model.
39
+ """
40
+
41
+ def __init__(self, model_path: str):
42
+ self.model_path = model_path
43
+ self.model = None
44
+
45
+ def init(self):
46
+ """Load the libsvm model file."""
47
+ if not os.path.exists(self.model_path):
48
+ raise FileNotFoundError(
49
+ f"SVR model file not found: {self.model_path}"
50
+ )
51
+
52
+ try:
53
+ from svmutil import svm_load_model
54
+ self.model = svm_load_model(self.model_path)
55
+ except ImportError:
56
+ # Try alternative import paths
57
+ try:
58
+ from libsvm.svmutil import svm_load_model
59
+ self.model = svm_load_model(self.model_path)
60
+ except ImportError:
61
+ raise ImportError(
62
+ "libsvm is required for Audio mode SVR quality mapping. "
63
+ "Install with: pip install libsvm-official"
64
+ )
65
+
66
+ def predict_quality(self, fvnsim: np.ndarray,
67
+ fvnsim10: np.ndarray = None,
68
+ fstdnsim: np.ndarray = None,
69
+ fvdegenergy: np.ndarray = None) -> float:
70
+ """
71
+ Predict MOS using SVR model.
72
+ Only fvnsim is used as input features.
73
+ """
74
+ try:
75
+ from svmutil import svm_predict
76
+ except ImportError:
77
+ from libsvm.svmutil import svm_predict
78
+
79
+ # Convert to libsvm format: {1: val1, 2: val2, ...}
80
+ x = {i + 1: float(v) for i, v in enumerate(fvnsim)}
81
+ # svm_predict returns (predicted_labels, (MSE, SCC, ...), decision_values)
82
+ predicted_labels, _, _ = svm_predict([0], [x], self.model, '-q')
83
+ return float(np.clip(predicted_labels[0], 1.0, 5.0))
84
+
85
+
86
+ class SpeechSimilarityToQualityMapper(SimilarityToQualityMapper):
87
+ """
88
+ Exponential-fit quality mapper for Speech mode.
89
+ Uses hardcoded parameters fitted on TCD-VOIP dataset.
90
+ """
91
+
92
+ # Fitted parameters (from C++ speech_similarity_to_quality_mapper.cc)
93
+ FIT_A = -262.847869
94
+ FIT_B = 0.0154302525
95
+ FIT_X0 = -361.063949
96
+ FIT_SCALE = 1.245063
97
+
98
+ def __init__(self, scale_to_max_mos: bool = True):
99
+ self.scale = self.FIT_SCALE if scale_to_max_mos else 1.0
100
+
101
+ def init(self):
102
+ """No initialization needed for exponential mapper."""
103
+ pass
104
+
105
+ def predict_quality(self, fvnsim: np.ndarray,
106
+ fvnsim10: np.ndarray = None,
107
+ fstdnsim: np.ndarray = None,
108
+ fvdegenergy: np.ndarray = None) -> float:
109
+ """
110
+ Predict MOS using exponential fit: a + exp(b * (x - x0)).
111
+ """
112
+ nsim_mean = float(np.mean(fvnsim))
113
+ mos = self.FIT_A + np.exp(self.FIT_B * (nsim_mean - self.FIT_X0))
114
+ return float(np.clip(mos * self.scale, 1.0, 5.0))
visqol/signal_utils.py ADDED
@@ -0,0 +1,83 @@
1
+ """
2
+ Signal processing utilities: envelope, cross-correlation, normalization.
3
+
4
+ Corresponds to C++ files: envelope.cc, xcorr.cc, misc_math.cc
5
+ """
6
+
7
+ import numpy as np
8
+ from scipy import signal as scipy_signal
9
+ from scipy.fft import fft, ifft
10
+
11
+
12
+ def upper_envelope(sig: np.ndarray) -> np.ndarray:
13
+ """
14
+ Calculate the upper envelope using Hilbert transform.
15
+ Matches C++ Envelope::CalcUpperEnv which:
16
+ 1. Centers signal by subtracting mean
17
+ 2. Computes Hilbert transform
18
+ 3. Takes absolute value (amplitude envelope)
19
+ 4. Adds mean back
20
+ """
21
+ mean_val = np.mean(sig)
22
+ centered = sig - mean_val
23
+ analytic = scipy_signal.hilbert(centered)
24
+ env = np.abs(analytic) + mean_val
25
+ return env
26
+
27
+
28
+ def find_best_lag(ref: np.ndarray, deg: np.ndarray) -> int:
29
+ """
30
+ Find the lag that maximizes cross-correlation between two signals.
31
+ Returns the lag (in samples) — positive means deg is delayed relative to ref.
32
+
33
+ Matches C++ XCorr::FindLowestLagIndex which uses FFT-based cross-correlation.
34
+ """
35
+ max_lag = max(len(ref), len(deg)) - 1
36
+
37
+ # Pad to same length
38
+ n = max(len(ref), len(deg))
39
+ ref_padded = np.zeros(n)
40
+ deg_padded = np.zeros(n)
41
+ ref_padded[:len(ref)] = ref
42
+ deg_padded[:len(deg)] = deg
43
+
44
+ # FFT-based cross-correlation
45
+ # fft_points = next power of 2 >= 2*n - 1
46
+ fft_points = 1
47
+ while fft_points < 2 * n - 1:
48
+ fft_points *= 2
49
+
50
+ fft_ref = fft(ref_padded, n=fft_points)
51
+ fft_deg = fft(deg_padded, n=fft_points)
52
+ pointwise = fft_ref * np.conj(fft_deg)
53
+ xcorr_full = np.real(ifft(pointwise))
54
+
55
+ # Build correlation vector: [negative lags, positive lags]
56
+ # Negative correlations: last max_lag elements
57
+ neg_corrs = xcorr_full[-max_lag:].tolist()
58
+ # Positive correlations: first max_lag+1 elements
59
+ pos_corrs = xcorr_full[:max_lag + 1].tolist()
60
+ corrs = neg_corrs + pos_corrs
61
+
62
+ best_idx = int(np.argmax(corrs))
63
+ return best_idx - max_lag
64
+
65
+
66
+ def normalize(mat: np.ndarray) -> np.ndarray:
67
+ """
68
+ Normalize a matrix/vector to [0, 1] range.
69
+ Matches C++ MiscMath::Normalize.
70
+ """
71
+ min_val = np.min(mat)
72
+ max_val = np.max(mat)
73
+ if max_val == min_val:
74
+ return np.zeros_like(mat)
75
+ return (mat - min_val) / (max_val - min_val)
76
+
77
+
78
+ def exponential_from_fit(x: float, a: float, b: float, x0: float) -> float:
79
+ """
80
+ Evaluate exponential function: a + exp(b * (x - x0))
81
+ Matches C++ MiscMath::ExponentialFromFit.
82
+ """
83
+ return a + np.exp(b * (x - x0))