PyPI - visqol-python - Versions diffs - 3.3.3__py3-none-any.whl - Mend

visqol-python 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

visqol/__init__.py +20 -0
visqol/__main__.py +92 -0
visqol/alignment.py +82 -0
visqol/analysis_window.py +52 -0
visqol/api.py +110 -0
visqol/audio_utils.py +90 -0
visqol/gammatone.py +418 -0
visqol/model/libsvm_nu_svr_model.txt +324 -0
visqol/nsim.py +134 -0
visqol/patch_creator.py +222 -0
visqol/patch_selector.py +357 -0
visqol/quality_mapper.py +114 -0
visqol/signal_utils.py +83 -0
visqol/visqol_core.py +240 -0
visqol/visqol_manager.py +194 -0
visqol_python-3.3.3.dist-info/METADATA +223 -0
visqol_python-3.3.3.dist-info/RECORD +21 -0
visqol_python-3.3.3.dist-info/WHEEL +5 -0
visqol_python-3.3.3.dist-info/entry_points.txt +2 -0
visqol_python-3.3.3.dist-info/licenses/LICENSE +201 -0
visqol_python-3.3.3.dist-info/top_level.txt +1 -0

visqol/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""
+ViSQOL - Virtual Speech Quality Objective Listener (Pure Python Implementation)
+A pure Python port of Google's ViSQOL v3.3.3 for objective audio quality assessment.
+Compares a reference audio signal with a degraded version and outputs a MOS-LQO score (1-5).
+Usage:
+    from visqol import VisqolApi
+    api = VisqolApi()
+    api.create(mode="audio")
+    result = api.measure("reference.wav", "degraded.wav")
+    print(f"MOS-LQO: {result.moslqo}")
+"""
+__version__ = "3.3.3"
+from visqol.api import VisqolApi
+__all__ = ["VisqolApi"]

visqol/__main__.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""
+ViSQOL command-line interface.
+Usage:
+    python -m visqol --reference ref.wav --degraded deg.wav [--speech_mode]
+"""
+import argparse
+import sys
+import logging
+from visqol.api import VisqolApi
+def main():
+    parser = argparse.ArgumentParser(
+        description="ViSQOL - Virtual Speech Quality Objective Listener (Python)"
+    )
+    parser.add_argument(
+        "--reference", "-r", required=True,
+        help="Path to reference audio file (WAV)"
+    )
+    parser.add_argument(
+        "--degraded", "-d", required=True,
+        help="Path to degraded audio file (WAV)"
+    )
+    parser.add_argument(
+        "--speech_mode", action="store_true",
+        help="Use speech mode (16kHz, exponential mapping)"
+    )
+    parser.add_argument(
+        "--model", default=None,
+        help="Path to SVR model file (Audio mode only)"
+    )
+    parser.add_argument(
+        "--search_window", type=int, default=60,
+        help="Search window radius (default: 60)"
+    )
+    parser.add_argument(
+        "--unscaled_speech", action="store_true",
+        help="Don't scale speech MOS to max 5.0"
+    )
+    parser.add_argument(
+        "--no_alignment", action="store_true",
+        help="Disable global alignment"
+    )
+    parser.add_argument(
+        "--no_realignment", action="store_true",
+        help="Disable fine realignment"
+    )
+    parser.add_argument(
+        "--verbose", "-v", action="store_true",
+        help="Enable verbose output"
+    )
+    args = parser.parse_args()
+    # Setup logging
+    level = logging.DEBUG if args.verbose else logging.WARNING
+    logging.basicConfig(level=level, format="%(levelname)s: %(message)s")
+    # Run ViSQOL
+    mode = "speech" if args.speech_mode else "audio"
+    api = VisqolApi()
+    api.create(
+        mode=mode,
+        model_path=args.model,
+        search_window=args.search_window,
+        use_unscaled_speech=args.unscaled_speech,
+        disable_global_alignment=args.no_alignment,
+        disable_realignment=args.no_realignment,
+    )
+    result = api.measure(args.reference, args.degraded)
+    # Output results
+    print(f"MOS-LQO:      {result.moslqo:.6f}")
+    print(f"VNSIM:        {result.vnsim:.6f}")
+    if args.verbose:
+        print(f"FVNSIM:       {result.fvnsim}")
+        print(f"FVNSIM10:     {result.fvnsim10}")
+        print(f"FSTDNSIM:     {result.fstdnsim}")
+        print(f"FVDEGENERGY:  {result.fvdegenergy}")
+        print(f"Patches:      {len(result.patch_sims)}")
+        for i, p in enumerate(result.patch_sims):
+            print(f"  Patch {i}: sim={p.similarity:.4f} "
+                  f"ref=[{p.ref_patch_start_time:.3f}-{p.ref_patch_end_time:.3f}] "
+                  f"deg=[{p.deg_patch_start_time:.3f}-{p.deg_patch_end_time:.3f}]")
+if __name__ == "__main__":
+    main()

visqol/alignment.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""
+Global signal alignment using upper envelope cross-correlation.
+Corresponds to C++ file: alignment.cc
+"""
+import numpy as np
+from visqol.audio_utils import AudioSignal
+from visqol.signal_utils import upper_envelope, find_best_lag
+def globally_align(reference: AudioSignal, degraded: AudioSignal) -> tuple:
+    """
+    Globally align degraded signal to reference signal.
+    Uses upper envelope cross-correlation to find the best time-domain lag,
+    then shifts the degraded signal accordingly.
+    Matches C++ Alignment::GloballyAlign.
+    Args:
+        reference: Reference audio signal.
+        degraded: Degraded audio signal.
+    Returns:
+        Tuple of (aligned_degraded: AudioSignal, lag_seconds: float).
+    """
+    ref_env = upper_envelope(reference.data)
+    deg_env = upper_envelope(degraded.data)
+    best_lag = find_best_lag(ref_env, deg_env)
+    # Limit lag to half the reference length
+    if best_lag == 0 or abs(best_lag) > len(reference.data) / 2.0:
+        return degraded, 0.0
+    if best_lag < 0:
+        # Degraded comes before reference: truncate front of degraded
+        new_data = degraded.data[abs(best_lag):]
+    else:
+        # Reference comes before degraded: prepend zeros to degraded
+        new_data = np.concatenate([np.zeros(best_lag), degraded.data])
+    aligned_signal = AudioSignal(new_data, degraded.sample_rate)
+    lag_seconds = best_lag / float(degraded.sample_rate)
+    return aligned_signal, lag_seconds
+def align_and_truncate(reference: AudioSignal,
+                       degraded: AudioSignal) -> tuple:
+    """
+    Align and truncate signals to the same length.
+    Matches C++ Alignment::AlignAndTruncate.
+    Returns:
+        Tuple of (aligned_ref: AudioSignal, aligned_deg: AudioSignal, lag_seconds: float).
+    """
+    aligned_deg, lag_seconds = globally_align(reference, degraded)
+    ref_data = reference.data
+    deg_data = aligned_deg.data
+    if len(ref_data) > len(deg_data):
+        ref_data = ref_data[:len(deg_data)]
+    elif len(ref_data) < len(deg_data):
+        # For positive lag, the beginning of ref aligns with zeros
+        lag_samples = int(lag_seconds * reference.sample_rate)
+        if lag_samples > 0:
+            ref_data = ref_data[lag_samples:]
+            deg_data = deg_data[lag_samples:lag_samples + len(ref_data)]
+        else:
+            deg_data = deg_data[:len(ref_data)]
+    # Ensure same length
+    min_len = min(len(ref_data), len(deg_data))
+    ref_data = ref_data[:min_len]
+    deg_data = deg_data[:min_len]
+    return (AudioSignal(ref_data, reference.sample_rate),
+            AudioSignal(deg_data, degraded.sample_rate),
+            lag_seconds)

visqol/analysis_window.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""
+Analysis window for spectrogram construction.
+Corresponds to C++ files: analysis_window.cc/h
+"""
+import numpy as np
+class AnalysisWindow:
+    """
+    Analysis window used for spectrogram frame windowing.
+    Attributes:
+        size: Window size in samples.
+        overlap: Overlap ratio (e.g. 0.25 means 25% of window size as hop).
+        window_duration: Duration of window in seconds.
+    """
+    def __init__(self, sample_rate: int, overlap: float = 0.25,
+                 window_duration: float = 0.08):
+        """
+        Args:
+            sample_rate: Sample rate of the audio signal.
+            overlap: Overlap as a fraction of window size (used as hop = size * overlap).
+            window_duration: Duration of the analysis window in seconds.
+        """
+        self.window_duration = window_duration
+        self.overlap = overlap
+        self.size = int(round(sample_rate * window_duration))
+        self._hann_window = None
+    @property
+    def hop_size(self) -> int:
+        """Hop size = window_size * overlap."""
+        return int(self.size * self.overlap)
+    @property
+    def hann_window(self) -> np.ndarray:
+        """Precomputed Hann window."""
+        if self._hann_window is None:
+            # Match C++ exactly: 0.5 - 0.5 * cos(2*pi*i/(size-1))
+            n = self.size
+            self._hann_window = 0.5 - 0.5 * np.cos(
+                2.0 * np.pi * np.arange(n) / (n - 1)
+            )
+        return self._hann_window
+    def apply_hann_window(self, frame: np.ndarray) -> np.ndarray:
+        """Apply Hann window to a frame."""
+        assert len(frame) == self.size
+        return frame * self.hann_window

visqol/api.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""
+ViSQOL public API.
+Provides a simple interface for comparing audio quality.
+Corresponds to C++ file: visqol_api.cc
+"""
+import os
+import numpy as np
+from typing import Optional
+from visqol.audio_utils import AudioSignal
+from visqol.visqol_manager import VisqolManager
+from visqol.visqol_core import SimilarityResult
+# Default SVR model path (bundled inside the package)
+_DEFAULT_MODEL_DIR = os.path.join(os.path.dirname(__file__), "model")
+_DEFAULT_SVR_MODEL = os.path.join(_DEFAULT_MODEL_DIR, "libsvm_nu_svr_model.txt")
+class VisqolApi:
+    """
+    Public API for ViSQOL audio quality assessment.
+    Usage:
+        api = VisqolApi()
+        api.create(mode="audio")
+        result = api.measure("reference.wav", "degraded.wav")
+        print(f"MOS-LQO: {result.moslqo}")
+    """
+    def __init__(self):
+        self._manager = VisqolManager()
+        self._is_created = False
+    def create(self,
+               mode: str = "audio",
+               model_path: Optional[str] = None,
+               search_window: int = 60,
+               use_unscaled_speech: bool = False,
+               disable_global_alignment: bool = False,
+               disable_realignment: bool = False):
+        """
+        Initialize ViSQOL with the specified configuration.
+        Args:
+            mode: "audio" for music/general audio (48kHz, SVR model) or
+                  "speech" for speech signals (16kHz, exponential fit).
+            model_path: Path to SVR model file (Audio mode only).
+                       If None, uses the bundled default model.
+            search_window: Search window radius (default 60).
+            use_unscaled_speech: If True, don't scale speech MOS to 5.0.
+            disable_global_alignment: Skip global alignment step.
+            disable_realignment: Skip fine realignment step.
+        """
+        use_speech_mode = mode.lower() == "speech"
+        if not use_speech_mode and model_path is None:
+            model_path = _DEFAULT_SVR_MODEL
+        self._manager.init(
+            model_path=model_path or "",
+            use_speech_mode=use_speech_mode,
+            use_unscaled_speech=use_unscaled_speech,
+            search_window=search_window,
+            disable_global_alignment=disable_global_alignment,
+            disable_realignment=disable_realignment,
+        )
+        self._is_created = True
+    def measure(self, ref_path: str, deg_path: str) -> SimilarityResult:
+        """
+        Compare two audio files and return quality assessment.
+        Args:
+            ref_path: Path to reference audio file (WAV).
+            deg_path: Path to degraded audio file (WAV).
+        Returns:
+            SimilarityResult containing MOS-LQO score and detailed results.
+        """
+        if not self._is_created:
+            raise RuntimeError(
+                "VisqolApi must be created (call .create()) before measuring."
+            )
+        return self._manager.run(ref_path, deg_path)
+    def measure_from_arrays(self, ref_array: np.ndarray,
+                            deg_array: np.ndarray,
+                            sample_rate: int) -> SimilarityResult:
+        """
+        Compare two audio signals from numpy arrays.
+        Args:
+            ref_array: Reference audio signal (1D numpy array).
+            deg_array: Degraded audio signal (1D numpy array).
+            sample_rate: Sample rate of both signals.
+        Returns:
+            SimilarityResult containing MOS-LQO score and detailed results.
+        """
+        if not self._is_created:
+            raise RuntimeError(
+                "VisqolApi must be created (call .create()) before measuring."
+            )
+        ref_signal = AudioSignal(ref_array, sample_rate)
+        deg_signal = AudioSignal(deg_array, sample_rate)
+        return self._manager.run_from_signals(ref_signal, deg_signal)

visqol/audio_utils.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""
+Audio utilities: WAV loading, SPL calculation, mono conversion.
+Corresponds to C++ files: wav_reader.cc, misc_audio.cc (partial)
+"""
+import logging
+import numpy as np
+logger = logging.getLogger(__name__)
+# Sound pressure level reference point (20 µPa)
+SPL_REFERENCE_POINT = 2e-5
+class AudioSignal:
+    """Container for audio signal data."""
+    def __init__(self, data: np.ndarray, sample_rate: int):
+        """
+        Args:
+            data: 1D numpy array of audio samples (mono), float64.
+            sample_rate: Sample rate in Hz.
+        """
+        self.data = np.asarray(data, dtype=np.float64).ravel()
+        self.sample_rate = int(sample_rate)
+    @property
+    def duration(self) -> float:
+        """Duration in seconds."""
+        return len(self.data) / self.sample_rate
+    @property
+    def num_samples(self) -> int:
+        return len(self.data)
+    def __len__(self):
+        return len(self.data)
+def load_audio(path: str):
+    """
+    Load a WAV file and return (data, sample_rate).
+    Data is normalized to float64 range [-1, 1].
+    """
+    import soundfile as sf
+    data, sr = sf.read(path, dtype='float64', always_2d=True)
+    return data, sr
+def to_mono(data: np.ndarray) -> np.ndarray:
+    """Convert multi-channel audio to mono by averaging channels."""
+    if data.ndim == 2 and data.shape[1] > 1:
+        return np.mean(data, axis=1)
+    elif data.ndim == 2:
+        return data[:, 0]
+    return data
+def load_as_mono(path: str) -> AudioSignal:
+    """Load a WAV file as mono AudioSignal."""
+    data, sr = load_audio(path)
+    mono_data = to_mono(data)
+    return AudioSignal(mono_data, sr)
+def calc_sound_pressure_level(signal: AudioSignal) -> float:
+    """
+    Calculate sound pressure level (dB SPL).
+    SPL = 20 * log10(rms / reference_point)
+    """
+    data = signal.data
+    rms = np.sqrt(np.mean(data ** 2))
+    if rms == 0:
+        return -np.inf
+    return 20.0 * np.log10(rms / SPL_REFERENCE_POINT)
+def scale_to_match_sound_pressure_level(
+    reference: AudioSignal, degraded: AudioSignal
+) -> AudioSignal:
+    """
+    Scale the degraded signal to match the SPL of the reference signal.
+    Returns a new AudioSignal with scaled data.
+    """
+    ref_spl = calc_sound_pressure_level(reference)
+    deg_spl = calc_sound_pressure_level(degraded)
+    scale_factor = 10.0 ** ((ref_spl - deg_spl) / 20.0)
+    scaled_data = degraded.data * scale_factor
+    return AudioSignal(scaled_data, degraded.sample_rate)