PyPI - ssbc - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ssbc 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

ssbc/__init__.py +59 -0
ssbc/__main__.py +4 -0
ssbc/cli.py +21 -0
ssbc/conformal.py +333 -0
ssbc/core.py +205 -0
ssbc/hyperparameter.py +258 -0
ssbc/simulation.py +148 -0
ssbc/ssbc.py +1 -0
ssbc/statistics.py +158 -0
ssbc/utils.py +2 -0
ssbc/visualization.py +459 -0
ssbc-0.1.0.dist-info/METADATA +266 -0
ssbc-0.1.0.dist-info/RECORD +17 -0
ssbc-0.1.0.dist-info/WHEEL +5 -0
ssbc-0.1.0.dist-info/entry_points.txt +2 -0
ssbc-0.1.0.dist-info/licenses/LICENSE +21 -0
ssbc-0.1.0.dist-info/top_level.txt +1 -0

ssbc/__init__.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""Top-level package for SSBC (Small-Sample Beta Correction)."""
+__author__ = """Petrus H Zwart"""
+__email__ = "phzwart@lbl.gov"
+__version__ = "0.1.0"
+# Core SSBC algorithm
+# Conformal prediction
+from .conformal import (
+    mondrian_conformal_calibrate,
+    split_by_class,
+)
+from .core import (
+    SSBCResult,
+    ssbc_correct,
+)
+# Hyperparameter tuning
+from .hyperparameter import (
+    sweep_and_plot_parallel_plotly,
+    sweep_hyperparams_and_collect,
+)
+# Simulation (for testing and examples)
+from .simulation import (
+    BinaryClassifierSimulator,
+)
+# Statistics utilities
+from .statistics import (
+    clopper_pearson_intervals,
+    cp_interval,
+)
+# Visualization and reporting
+from .visualization import (
+    plot_parallel_coordinates_plotly,
+    report_prediction_stats,
+)
+__all__ = [
+    # Core
+    "SSBCResult",
+    "ssbc_correct",
+    # Conformal
+    "mondrian_conformal_calibrate",
+    "split_by_class",
+    # Statistics
+    "clopper_pearson_intervals",
+    "cp_interval",
+    # Simulation
+    "BinaryClassifierSimulator",
+    # Visualization
+    "report_prediction_stats",
+    "plot_parallel_coordinates_plotly",
+    # Hyperparameter
+    "sweep_hyperparams_and_collect",
+    "sweep_and_plot_parallel_plotly",
+]

ssbc/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cli import app
+if __name__ == "__main__":
+    app()

ssbc/cli.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Console script for ssbc."""
+import typer
+from rich.console import Console
+from ssbc import utils
+app = typer.Typer()
+console = Console()
+@app.command()
+def main():
+    """Console script for ssbc."""
+    console.print("Replace this message by putting your code into ssbc.cli.main")
+    console.print("See Typer documentation at https://typer.tiangolo.com/")
+    utils.do_something_useful()
+if __name__ == "__main__":
+    app()

ssbc/conformal.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""Mondrian conformal prediction with SSBC correction."""
+from typing import Any, Literal
+import numpy as np
+from .core import ssbc_correct
+from .statistics import cp_interval
+def split_by_class(labels: np.ndarray, probs: np.ndarray) -> dict[int, dict[str, Any]]:
+    """Split calibration data by true class for Mondrian conformal prediction.
+    Parameters
+    ----------
+    labels : np.ndarray, shape (n,)
+        True binary labels (0 or 1)
+    probs : np.ndarray, shape (n, 2)
+        Classification probabilities [P(class=0), P(class=1)]
+    Returns
+    -------
+    dict
+        Dictionary with keys 0 and 1, each containing:
+        - 'labels': labels for this class (all same value)
+        - 'probs': probabilities for samples in this class
+        - 'indices': original indices (for tracking)
+        - 'n': number of samples in this class
+    Examples
+    --------
+    >>> labels = np.array([0, 1, 0, 1])
+    >>> probs = np.array([[0.8, 0.2], [0.3, 0.7], [0.9, 0.1], [0.2, 0.8]])
+    >>> class_data = split_by_class(labels, probs)
+    >>> print(class_data[0]['n'])  # Number of class 0 samples
+    2
+    """
+    class_data = {}
+    for label in [0, 1]:
+        mask = labels == label
+        indices = np.where(mask)[0]
+        class_data[label] = {"labels": labels[mask], "probs": probs[mask], "indices": indices, "n": np.sum(mask)}
+    return class_data
+def mondrian_conformal_calibrate(
+    class_data: dict[int, dict[str, Any]],
+    alpha_target: float | dict[int, float],
+    delta: float | dict[int, float],
+    mode: Literal["beta", "beta-binomial"] = "beta",
+    m: int | None = None,
+) -> tuple[dict[int, dict[str, Any]], dict[Any, Any]]:
+    """Perform Mondrian (per-class) conformal calibration with SSBC correction.
+    For each class, compute:
+    1. Nonconformity scores: s(x, y) = 1 - P(y|x)
+    2. SSBC-corrected alpha for PAC guarantee
+    3. Conformal quantile threshold
+    4. Singleton error rate bounds via PAC guarantee
+    Then evaluate prediction set sizes on calibration data PER CLASS and MARGINALLY.
+    Parameters
+    ----------
+    class_data : dict
+        Output from split_by_class()
+    alpha_target : float or dict
+        Target miscoverage rate for each class
+        If float: same for both classes
+        If dict: {0: α0, 1: α1} for per-class control
+    delta : float or dict
+        PAC risk tolerance for each class
+        If float: same for both classes
+        If dict: {0: δ0, 1: δ1} for per-class control
+    mode : str, default="beta"
+        "beta" (infinite test) or "beta-binomial" (finite test)
+    m : int, optional
+        Test window size for beta-binomial mode
+    Returns
+    -------
+    calibration_result : dict
+        Dictionary with keys 0 and 1, each containing calibration info
+    prediction_stats : dict
+        Dictionary with keys:
+        - 0, 1: per-class statistics (conditioned on true label)
+        - 'marginal': overall statistics (ignoring true labels)
+    Examples
+    --------
+    >>> labels = np.array([0, 1, 0, 1])
+    >>> probs = np.array([[0.8, 0.2], [0.3, 0.7], [0.9, 0.1], [0.2, 0.8]])
+    >>> class_data = split_by_class(labels, probs)
+    >>> cal_result, pred_stats = mondrian_conformal_calibrate(
+    ...     class_data, alpha_target=0.1, delta=0.1
+    ... )
+    """
+    # Handle scalar or dict inputs for alpha and delta
+    alpha_dict: dict[int, float]
+    if isinstance(alpha_target, int | float):
+        alpha_dict = {0: float(alpha_target), 1: float(alpha_target)}
+    else:
+        # alpha_target is dict[int, float] in this branch
+        assert isinstance(alpha_target, dict), "alpha_target must be dict if not scalar"
+        alpha_dict = {k: float(v) for k, v in alpha_target.items()}
+    delta_dict: dict[int, float]
+    if isinstance(delta, int | float):
+        delta_dict = {0: float(delta), 1: float(delta)}
+    else:
+        # delta is dict[int, float] in this branch
+        assert isinstance(delta, dict), "delta must be dict if not scalar"
+        delta_dict = {k: float(v) for k, v in delta.items()}
+    calibration_result = {}
+    # Step 1: Calibrate per class
+    for label in [0, 1]:
+        data = class_data[label]
+        n = data["n"]
+        alpha_class = alpha_dict[label]
+        delta_class = delta_dict[label]
+        if n == 0:
+            calibration_result[label] = {
+                "n": 0,
+                "alpha_target": alpha_class,
+                "alpha_corrected": None,
+                "delta": delta_class,
+                "threshold": None,
+                "scores": np.array([]),
+                "ssbc_result": None,
+                "error": "No calibration samples for this class",
+            }
+            continue
+        # Compute nonconformity scores: s(x, y) = 1 - P(y|x)
+        true_class_probs = data["probs"][:, label]
+        scores = 1.0 - true_class_probs
+        # Apply SSBC to get corrected alpha
+        ssbc_result = ssbc_correct(alpha_target=alpha_class, n=n, delta=delta_class, mode=mode, m=m)
+        alpha_corrected = ssbc_result.alpha_corrected
+        # Compute conformal quantile threshold
+        k = int(np.ceil((n + 1) * (1 - alpha_corrected)))
+        k = min(k, n)
+        sorted_scores = np.sort(scores)
+        threshold = sorted_scores[k - 1] if k > 0 else sorted_scores[0]
+        calibration_result[label] = {
+            "n": n,
+            "alpha_target": alpha_class,
+            "alpha_corrected": alpha_corrected,
+            "delta": delta_class,
+            "threshold": threshold,
+            "scores": sorted_scores,
+            "ssbc_result": ssbc_result,
+            "k": k,
+        }
+    # Step 2: Evaluate prediction sets
+    if calibration_result[0].get("threshold") is None or calibration_result[1].get("threshold") is None:
+        return calibration_result, {
+            "error": "Cannot compute prediction sets - missing threshold for at least one class"
+        }
+    threshold_0 = calibration_result[0]["threshold"]
+    threshold_1 = calibration_result[1]["threshold"]
+    prediction_stats = {}
+    # Step 2a: Evaluate per true class
+    for true_label in [0, 1]:
+        data = class_data[true_label]
+        n_class = data["n"]
+        if n_class == 0:
+            prediction_stats[true_label] = {"n_class": 0, "error": "No samples in this class"}
+            continue
+        probs = data["probs"]
+        prediction_sets = []
+        for i in range(n_class):
+            score_0 = 1.0 - probs[i, 0]
+            score_1 = 1.0 - probs[i, 1]
+            pred_set = []
+            if score_0 <= threshold_0:
+                pred_set.append(0)
+            if score_1 <= threshold_1:
+                pred_set.append(1)
+            prediction_sets.append(pred_set)
+        # Count set sizes and correctness
+        n_abstentions = sum(len(ps) == 0 for ps in prediction_sets)
+        n_doublets = sum(len(ps) == 2 for ps in prediction_sets)
+        n_singletons_correct = sum(ps == [true_label] for ps in prediction_sets)
+        n_singletons_incorrect = sum(len(ps) == 1 and true_label not in ps for ps in prediction_sets)
+        n_singletons_total = n_singletons_correct + n_singletons_incorrect
+        # PAC bounds
+        n_escalations = n_doublets + n_abstentions
+        if n_escalations > 0 and n_singletons_total > 0:
+            rho = n_singletons_total / n_escalations
+            kappa = n_abstentions / n_escalations
+            alpha_singlet_bound = alpha_dict[true_label] * (1 + 1 / rho) - kappa / rho
+            alpha_singlet_observed = n_singletons_incorrect / n_singletons_total if n_singletons_total > 0 else 0.0
+        else:
+            rho = None
+            kappa = None
+            alpha_singlet_bound = None
+            alpha_singlet_observed = None
+        prediction_stats[true_label] = {
+            "n_class": n_class,
+            "alpha_target": alpha_dict[true_label],
+            "delta": delta_dict[true_label],
+            "abstentions": cp_interval(n_abstentions, n_class),
+            "singletons": cp_interval(n_singletons_total, n_class),
+            "singletons_correct": cp_interval(n_singletons_correct, n_class),
+            "singletons_incorrect": cp_interval(n_singletons_incorrect, n_class),
+            "doublets": cp_interval(n_doublets, n_class),
+            "prediction_sets": prediction_sets,
+            "pac_bounds": {
+                "rho": rho,
+                "kappa": kappa,
+                "alpha_singlet_bound": alpha_singlet_bound,
+                "alpha_singlet_observed": alpha_singlet_observed,
+                "n_singletons": n_singletons_total,
+                "n_escalations": n_escalations,
+            },
+        }
+    # Step 2b: MARGINAL ANALYSIS (ignoring true labels)
+    # Reconstruct full dataset
+    all_labels = np.concatenate([class_data[0]["labels"], class_data[1]["labels"]])
+    all_probs = np.concatenate([class_data[0]["probs"], class_data[1]["probs"]], axis=0)
+    all_indices = np.concatenate([class_data[0]["indices"], class_data[1]["indices"]])
+    # Sort back to original order
+    sort_idx = np.argsort(all_indices)
+    all_labels = all_labels[sort_idx]
+    all_probs = all_probs[sort_idx]
+    n_total = len(all_labels)
+    # Compute prediction sets for all samples
+    all_prediction_sets = []
+    for i in range(n_total):
+        score_0 = 1.0 - all_probs[i, 0]
+        score_1 = 1.0 - all_probs[i, 1]
+        pred_set = []
+        if score_0 <= threshold_0:
+            pred_set.append(0)
+        if score_1 <= threshold_1:
+            pred_set.append(1)
+        all_prediction_sets.append(pred_set)
+    # Count overall set sizes
+    n_abstentions_total = sum(len(ps) == 0 for ps in all_prediction_sets)
+    n_singletons_total = sum(len(ps) == 1 for ps in all_prediction_sets)
+    n_doublets_total = sum(len(ps) == 2 for ps in all_prediction_sets)
+    # Break down singletons by predicted class
+    n_singletons_pred_0 = sum(ps == [0] for ps in all_prediction_sets)
+    n_singletons_pred_1 = sum(ps == [1] for ps in all_prediction_sets)
+    # Compute overall coverage
+    n_covered = sum(all_labels[i] in all_prediction_sets[i] for i in range(n_total))
+    coverage = n_covered / n_total
+    # Compute errors on singletons
+    singleton_mask = [len(ps) == 1 for ps in all_prediction_sets]
+    n_singletons_covered = sum(all_labels[i] in all_prediction_sets[i] for i in range(n_total) if singleton_mask[i])
+    n_singletons_errors = n_singletons_total - n_singletons_covered
+    # Overall PAC bounds (using weighted average of alphas for interpretation)
+    n_escalations_total = n_doublets_total + n_abstentions_total
+    if n_escalations_total > 0 and n_singletons_total > 0:
+        rho_marginal = n_singletons_total / n_escalations_total
+        kappa_marginal = n_abstentions_total / n_escalations_total
+        # Weighted average alpha (by class size)
+        n_0 = class_data[0]["n"]
+        n_1 = class_data[1]["n"]
+        alpha_weighted = (n_0 * alpha_dict[0] + n_1 * alpha_dict[1]) / (n_0 + n_1)
+        alpha_singlet_bound_marginal = alpha_weighted * (1 + 1 / rho_marginal) - kappa_marginal / rho_marginal
+        alpha_singlet_observed_marginal = n_singletons_errors / n_singletons_total
+    else:
+        rho_marginal = None
+        kappa_marginal = None
+        alpha_weighted = None
+        alpha_singlet_bound_marginal = None
+        alpha_singlet_observed_marginal = None
+    prediction_stats["marginal"] = {
+        "n_total": n_total,
+        "coverage": {"count": n_covered, "rate": coverage, "ci_95": cp_interval(n_covered, n_total)},
+        "abstentions": cp_interval(n_abstentions_total, n_total),
+        "singletons": {
+            **cp_interval(n_singletons_total, n_total),
+            "pred_0": n_singletons_pred_0,
+            "pred_1": n_singletons_pred_1,
+            "errors": n_singletons_errors,
+        },
+        "doublets": cp_interval(n_doublets_total, n_total),
+        "prediction_sets": all_prediction_sets,
+        "pac_bounds": {
+            "rho": rho_marginal,
+            "kappa": kappa_marginal,
+            "alpha_weighted": alpha_weighted,
+            "alpha_singlet_bound": alpha_singlet_bound_marginal,
+            "alpha_singlet_observed": alpha_singlet_observed_marginal,
+            "n_singletons": n_singletons_total,
+            "n_escalations": n_escalations_total,
+        },
+    }
+    return calibration_result, prediction_stats

ssbc/core.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Core SSBC (Small-Sample Beta Correction) algorithm."""
+import math
+from dataclasses import dataclass
+from typing import Literal
+from scipy.stats import beta as beta_dist
+from scipy.stats import betabinom, norm
+@dataclass
+class SSBCResult:
+    """Result of SSBC correction.
+    Attributes:
+        alpha_target: Target miscoverage rate
+        alpha_corrected: Corrected miscoverage rate (u_star / (n+1))
+        u_star: Optimal u value found by the algorithm
+        n: Calibration set size
+        satisfied_mass: Probability that coverage >= target
+        mode: "beta" for infinite test window, "beta-binomial" for finite
+        details: Additional diagnostic information
+    """
+    alpha_target: float
+    alpha_corrected: float
+    u_star: int
+    n: int
+    satisfied_mass: float
+    mode: Literal["beta", "beta-binomial"]
+    details: dict
+def ssbc_correct(
+    alpha_target: float,
+    n: int,
+    delta: float,
+    *,
+    mode: Literal["beta", "beta-binomial"] = "beta",
+    m: int | None = None,
+    bracket_width: int | None = None,
+) -> SSBCResult:
+    """Small-Sample Beta Correction (SSBC), corrected acceptance rule.
+    Find the largest α' = u/(n+1) ≤ α_target such that:
+    P(Coverage(α') ≥ 1 - α_target) ≥ 1 - δ
+    where Coverage(α') ~ Beta(n+1-u, u) for infinite test window.
+    Parameters
+    ----------
+    alpha_target : float
+        Target miscoverage rate (must be in (0,1))
+    n : int
+        Calibration set size (must be >= 1)
+    delta : float
+        Risk tolerance / PAC parameter (must be in (0,1))
+    mode : {"beta", "beta-binomial"}, default="beta"
+        "beta" for infinite test window
+        "beta-binomial" for finite test window
+    m : int, optional
+        Test window size (required for beta-binomial mode)
+    bracket_width : int, optional
+        Search radius around initial guess (default: adaptive based on n)
+    Returns
+    -------
+    SSBCResult
+        Dataclass containing correction results and diagnostic details
+    Raises
+    ------
+    ValueError
+        If parameters are out of valid ranges
+    Examples
+    --------
+    >>> result = ssbc_correct(alpha_target=0.10, n=50, delta=0.10)
+    >>> print(f"Corrected alpha: {result.alpha_corrected:.4f}")
+    Notes
+    -----
+    The algorithm uses a bracketed search with an initial guess based on
+    normal approximation to the Beta distribution. For large n, the search
+    is adaptive to maintain efficiency.
+    """
+    # Input validation
+    if not (0.0 < alpha_target < 1.0):
+        raise ValueError("alpha_target must be in (0,1).")
+    if n < 1:
+        raise ValueError("n must be >= 1.")
+    if not (0.0 < delta < 1.0):
+        raise ValueError("delta must be in (0,1).")
+    if mode not in ("beta", "beta-binomial"):
+        raise ValueError("mode must be 'beta' or 'beta-binomial'.")
+    # Maximum u to search (α' must be ≤ α_target)
+    u_max = min(n, math.floor(alpha_target * (n + 1)))
+    target_coverage = 1 - alpha_target
+    # Initial guess for u using normal approximation to Beta distribution
+    # We want P(Beta(n+1-u, u) >= target_coverage) ≈ 1-δ
+    # Using normal approximation: u ≈ u_target - z_δ * sqrt(u_target)
+    # where u_target = (n+1)*α_target and z_δ = Φ^(-1)(1-δ)
+    u_target = (n + 1) * alpha_target
+    z_delta = norm.ppf(1 - delta)  # quantile function (inverse CDF)
+    u_star_guess = max(1, math.floor(u_target - z_delta * math.sqrt(u_target)))
+    # Clamp to valid range
+    u_star_guess = min(u_max, u_star_guess)
+    # Bracket width (Δ in Algorithm 1)
+    if bracket_width is None:
+        # Adaptive bracket: wider for small n, scales with √n for large n
+        # For large n, the uncertainty scales as √u_target ~ (n*α)^(1/2)
+        bracket_width = max(5, min(int(2 * z_delta * math.sqrt(u_target)), n // 10))
+        bracket_width = min(bracket_width, 100)  # cap at 100 for efficiency
+    # Search bounds - ensure we don't go outside [1, u_max]
+    u_min = max(1, u_star_guess - bracket_width)
+    u_search_max = min(u_max, u_star_guess + bracket_width)
+    # If the guess is way off (e.g., guess > u_max), fall back to full search
+    if u_min > u_search_max:
+        u_min = 1
+        u_search_max = u_max
+    if mode == "beta-binomial":
+        m_eval = m if m is not None else n
+        if m_eval < 1:
+            raise ValueError("m must be >= 1 for beta-binomial mode.")
+        k_thresh = math.ceil(target_coverage * m_eval)
+    u_star: int | None = None
+    mass_star: float | None = None
+    # Search from u_min up to u_search_max to find the largest u that satisfies the condition
+    # Keep updating u_star as we find larger values that work
+    search_log = []
+    for u in range(u_min, u_search_max + 1):
+        # When we calibrate at α' = u/(n+1), coverage follows:
+        a = n + 1 - u  # first parameter
+        b = u  # second parameter
+        alpha_prime = u / (n + 1)
+        if mode == "beta":
+            # P(Coverage ≥ target_coverage) where Coverage ~ Beta(a, b)
+            # Using: P(X >= x) = 1 - CDF(x) for continuous distributions
+            ptail = 1 - beta_dist.cdf(target_coverage, a, b)
+        else:
+            # P(X ≥ k_thresh) where X ~ BetaBinomial(m, a, b)
+            ptail = betabinom.sf(k_thresh - 1, m_eval, a, b)
+        passes = ptail >= 1 - delta
+        search_log.append(
+            {
+                "u": u,
+                "alpha_prime": alpha_prime,
+                "a": a,
+                "b": b,
+                "ptail": ptail,
+                "threshold": 1 - delta,
+                "passes": passes,
+            }
+        )
+        # Accept if probability is high enough - keep updating to find the largest
+        if passes:
+            u_star = u
+            mass_star = ptail
+    # If nothing passes, fall back to u=1 (most conservative)
+    if u_star is None:
+        u_star = 1
+        a = n + 1 - u_star
+        b = u_star
+        mass_star = (
+            1 - beta_dist.cdf(target_coverage, a, b)
+            if mode == "beta"
+            else betabinom.sf(k_thresh - 1, (m if m else n), a, b)
+        )
+    alpha_corrected = u_star / (n + 1)
+    # At this point, mass_star is always set (either from loop or fallback)
+    assert mass_star is not None, "mass_star should be set by this point"
+    return SSBCResult(
+        alpha_target=alpha_target,
+        alpha_corrected=alpha_corrected,
+        u_star=u_star,
+        n=n,
+        satisfied_mass=mass_star,
+        mode=mode,
+        details=dict(
+            u_max=u_max,
+            u_star_guess=u_star_guess,
+            search_range=(u_min, u_search_max),
+            bracket_width=bracket_width,
+            delta=delta,
+            m=(m if (mode == "beta-binomial") else None),
+            acceptance_rule="P(Coverage >= target) >= 1-delta",
+            search_log=search_log,
+        ),
+    )