PyPI - ssbc - Versions diffs - 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

ssbc 0.1.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

ssbc/__init__.py +50 -2
ssbc/bootstrap.py +411 -0
ssbc/cli.py +0 -3
ssbc/conformal.py +700 -1
ssbc/cross_conformal.py +425 -0
ssbc/mcp_server.py +93 -0
ssbc/operational_bounds_simple.py +367 -0
ssbc/rigorous_report.py +601 -0
ssbc/statistics.py +70 -0
ssbc/utils.py +72 -2
ssbc/validation.py +409 -0
ssbc/visualization.py +323 -300
ssbc-1.1.0.dist-info/METADATA +337 -0
ssbc-1.1.0.dist-info/RECORD +22 -0
ssbc-1.1.0.dist-info/licenses/LICENSE +29 -0
ssbc/ssbc.py +0 -1
ssbc-0.1.0.dist-info/METADATA +0 -266
ssbc-0.1.0.dist-info/RECORD +0 -17
ssbc-0.1.0.dist-info/licenses/LICENSE +0 -21
{ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/WHEEL +0 -0
{ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/entry_points.txt +0 -0
{ssbc-0.1.0.dist-info → ssbc-1.1.0.dist-info}/top_level.txt +0 -0

ssbc/visualization.py CHANGED Viewed

@@ -5,19 +5,62 @@ from typing import Any
 from .statistics import cp_interval
+def compute_conditional_rate_bounds(
+    numerator_fold_results: list[dict],
+    denominator_fold_results: list[dict],
+    weights: list[float],
+) -> tuple[float, float, float]:
+    """Compute bounds on conditional rate from fold-level counts.
+    For conditional rate P(A | B), computes cross-validated bounds by:
+    1. Computing A_count / B_count in each fold
+    2. Using Clopper-Pearson on aggregated counts
+    Parameters
+    ----------
+    numerator_fold_results : list[dict]
+        Fold results for numerator event (e.g., correct_in_singleton)
+    denominator_fold_results : list[dict]
+        Fold results for denominator event (e.g., singleton)
+    weights : list[float]
+        Fold weights
+    Returns
+    -------
+    rate : float
+        Estimated conditional rate
+    lower : float
+        Lower CI bound
+    upper : float
+        Upper CI bound
+    """
+    # Aggregate counts across folds
+    total_numerator = sum(fold["K_fr"] for fold in numerator_fold_results)
+    total_denominator = sum(fold["K_fr"] for fold in denominator_fold_results)
+    if total_denominator == 0:
+        return 0.0, 0.0, 1.0
+    # Compute CP interval on aggregated counts
+    ci = cp_interval(total_numerator, total_denominator)
+    return ci["proportion"], ci["lower"], ci["upper"]
 def report_prediction_stats(
-    prediction_stats: dict[Any, Any], calibration_result: dict[Any, Any], verbose: bool = True
+    prediction_stats: dict[Any, Any],
+    calibration_result: dict[Any, Any],
+    operational_bounds_per_class: dict[int, Any] | None = None,
+    marginal_operational_bounds: Any | None = None,
+    verbose: bool = True,
 ) -> dict[str | int, Any]:
-    """Pretty/robust summary for Mondrian conformal prediction stats.
+    """Report rigorous statistics for Mondrian conformal prediction with valid CIs.
-    Tolerates multiple schema shapes:
-      - dicts with 'rate'/'ci_95' or 'proportion'/'lower'/'upper'
-      - raw ints for counts (e.g., marginal['singletons']['pred_0'] = 339)
-      - per-class singleton correct/incorrect either nested under 'singletons'
-        OR as top-level aliases 'singletons_correct' / 'singletons_incorrect'.
+    Only displays statistics with valid confidence intervals:
+    - Per-class statistics from calibration data (valid within class)
+    - Per-class operational bounds from cross-validation (rigorous PAC bounds)
+    - Marginal operational bounds from cross-validated Mondrian (rigorous PAC bounds)
-    Also computes Clopper-Pearson CIs when missing, and splits marginal
-    singleton errors by predicted class.
+    Does NOT display marginal statistics from calibration data (invalid CIs for Mondrian).
     Parameters
     ----------
@@ -25,338 +68,318 @@ def report_prediction_stats(
         Output from mondrian_conformal_calibrate (second return value)
     calibration_result : dict
         Output from mondrian_conformal_calibrate (first return value)
+    operational_bounds_per_class : dict[int, OperationalRateBoundsResult], optional
+        Per-class operational bounds from compute_mondrian_operational_bounds
+    marginal_operational_bounds : OperationalRateBoundsResult, optional
+        Marginal operational bounds from compute_marginal_operational_bounds
     verbose : bool, default=True
         If True, print detailed statistics to stdout
     Returns
     -------
     dict
-        Structured summary with CIs for all metrics, containing:
+        Structured summary with valid CIs:
         - Keys 0, 1 for per-class statistics
-        - Key 'marginal' for overall deployment statistics
+        - Key 'marginal_bounds' if marginal_operational_bounds provided
     Examples
     --------
-    >>> # After calibration
+    >>> # Basic: Only calibration data (limited info)
     >>> cal_result, pred_stats = mondrian_conformal_calibrate(...)
-    >>> summary = report_prediction_stats(pred_stats, cal_result, verbose=True)
-    >>> print(summary['marginal']['coverage']['rate'])
+    >>> summary = report_prediction_stats(pred_stats, cal_result)
+    >>>
+    >>> # With per-class operational bounds (rigorous)
+    >>> op_bounds = compute_mondrian_operational_bounds(cal_result, class_data, delta=0.05)
+    >>> summary = report_prediction_stats(pred_stats, cal_result, op_bounds)
+    >>>
+    >>> # With marginal bounds too (complete SLA)
+    >>> marginal = compute_marginal_operational_bounds(labels, probs, 0.1, 0.05, 0.05)
+    >>> summary = report_prediction_stats(pred_stats, cal_result, op_bounds, marginal)
     """
-    # Helper functions
-    def as_dict(x: Any) -> dict[str, Any]:
-        """Ensure x is a dict."""
-        return x if isinstance(x, dict) else {}
-    def get_count(x: Any, default: int = 0) -> int:
-        """Extract count from dict or int."""
-        if isinstance(x, dict):
-            return int(x.get("count", default))
-        if isinstance(x, int):
-            return int(x)
-        return default
-    def get_rate(x: Any, default: float | None = 0.0) -> float | None:
-        """Extract rate from dict or float."""
-        if isinstance(x, dict):
-            if "rate" in x:
-                return float(x["rate"])
-            if "proportion" in x:
-                return float(x["proportion"])
-            return default
-        if isinstance(x, float):
-            return float(x)
-        return default
-    def get_ci_tuple(x: Any) -> tuple[float, float]:
-        """Extract CI bounds from dict."""
-        if not isinstance(x, dict):
-            return (0.0, 0.0)
-        if "ci_95" in x and isinstance(x["ci_95"], tuple | list) and len(x["ci_95"]) == 2:
-            return float(x["ci_95"][0]), float(x["ci_95"][1])
-        lo = x.get("lower", 0.0)
-        hi = x.get("upper", 0.0)
-        return float(lo), float(hi)
-    def ensure_ci(d: dict[str, Any], count: int, total: int) -> tuple[float, float, float]:
-        """Return (rate, lo, hi). If d already has rate/CI, use them; else compute CP from count/total."""
-        r = get_rate(d, default=None)
-        lo, hi = get_ci_tuple(d)
-        if r is None or (lo == 0.0 and hi == 0.0 and (count > 0 or total > 0)):
-            ci = cp_interval(count, total)
-            return ci["proportion"], ci["lower"], ci["upper"]
-        return float(r), float(lo), float(hi)
-    def pct(x: float) -> str:
-        """Format percentage."""
-        return f"{x:6.2%}"
+    from .statistics import cp_interval
     summary: dict[str | int, Any] = {}
     if verbose:
         print("=" * 80)
-        print("PREDICTION SET STATISTICS (All rates with 95% Clopper-Pearson CIs)")
+        print("MONDRIAN CONFORMAL PREDICTION REPORT")
         print("=" * 80)
-    # ----------------- per-class (conditioned on Y) -----------------
-    for class_label in [0, 1]:
-        if class_label not in prediction_stats:
-            continue
+    # ==================== PER-CLASS STATISTICS ====================
+    for class_label in sorted([k for k in prediction_stats.keys() if isinstance(k, int)]):
         cls = prediction_stats[class_label]
         if isinstance(cls, dict) and "error" in cls:
             if verbose:
-                print(f"\nClass {class_label}: {cls['error']}")
+                print(f"\nCLASS {class_label}: {cls['error']}")
             summary[class_label] = {"error": cls["error"]}
             continue
         n = int(cls.get("n", cls.get("n_class", 0)))
-        alpha_target = cls.get("alpha_target", calibration_result.get(class_label, {}).get("alpha_target", None))
-        delta = cls.get("delta", calibration_result.get(class_label, {}).get("delta", None))
-        abst = as_dict(cls.get("abstentions", {}))
-        sing = as_dict(cls.get("singletons", {}))
-        # Accept both nested and flat aliases
-        sing_corr = as_dict(sing.get("correct", cls.get("singletons_correct", {})))
-        sing_inc = as_dict(sing.get("incorrect", cls.get("singletons_incorrect", {})))
-        doub = as_dict(cls.get("doublets", {}))
-        pac = as_dict(cls.get("pac_bounds", {}))
-        # Counts
-        abst_count = get_count(abst)
-        sing_count = get_count(sing)
-        sing_corr_count = get_count(sing_corr)
-        sing_inc_count = get_count(sing_inc)
-        doub_count = get_count(doub)
-        # Ensure rates/CIs (fallback to CP if missing)
-        abst_rate, abst_lo, abst_hi = ensure_ci(abst, abst_count, n)
-        sing_rate, sing_lo, sing_hi = ensure_ci(sing, sing_count, n)
-        sing_corr_rate, sing_corr_lo, sing_corr_hi = ensure_ci(sing_corr, sing_corr_count, n)
-        sing_inc_rate, sing_inc_lo, sing_inc_hi = ensure_ci(sing_inc, sing_inc_count, n)
-        doub_rate, doub_lo, doub_hi = ensure_ci(doub, doub_count, n)
-        # P(error | singleton, Y=class)
-        err_given_single_ci = cp_interval(sing_inc_count, sing_count if sing_count > 0 else 1)
-        class_summary = {
-            "n": n,
-            "alpha_target": alpha_target,
-            "delta": delta,
-            "abstentions": {"count": abst_count, "rate": abst_rate, "ci_95": (abst_lo, abst_hi)},
-            "singletons": {
-                "count": sing_count,
-                "rate": sing_rate,
-                "ci_95": (sing_lo, sing_hi),
-                "correct": {"count": sing_corr_count, "rate": sing_corr_rate, "ci_95": (sing_corr_lo, sing_corr_hi)},
-                "incorrect": {"count": sing_inc_count, "rate": sing_inc_rate, "ci_95": (sing_inc_lo, sing_inc_hi)},
-                "error_given_singleton": {
-                    "count": sing_inc_count,
-                    "denom": sing_count,
-                    "rate": err_given_single_ci["proportion"],
-                    "ci_95": (err_given_single_ci["lower"], err_given_single_ci["upper"]),
-                },
-            },
-            "doublets": {"count": doub_count, "rate": doub_rate, "ci_95": (doub_lo, doub_hi)},
-            "pac_bounds": pac,
-        }
-        summary[class_label] = class_summary
+        if n == 0:
+            continue
+        # Get calibration info
+        cal = calibration_result.get(class_label, {})
+        alpha_target = cal.get("alpha_target")
+        alpha_corrected = cal.get("alpha_corrected")
+        delta = cal.get("delta")
+        threshold = cal.get("threshold")
         if verbose:
             print(f"\n{'=' * 80}")
             print(f"CLASS {class_label} (Conditioned on True Label = {class_label})")
             print(f"{'=' * 80}")
-            alpha_str = f"{alpha_target:.3f}" if alpha_target is not None else "n/a"
-            delta_str = f"{delta:.3f}" if delta is not None else "n/a"
-            print(f"  n={n}, α_target={alpha_str}, δ={delta_str}")
-            print("\nPrediction Set Breakdown:")
-            print(
-                f"  Abstentions:  {abst_count:4d} / {n:4d} = {pct(abst_rate)}  95% CI: [{abst_lo:.4f}, {abst_hi:.4f}]"
-            )
-            print(
-                f"  Singletons:   {sing_count:4d} / {n:4d} = {pct(sing_rate)}  95% CI: [{sing_lo:.4f}, {sing_hi:.4f}]"
-            )
-            print(
-                f"    ├─ Correct:   {sing_corr_count:4d} / {n:4d} = {pct(sing_corr_rate)}  "
-                f"95% CI: [{sing_corr_lo:.4f}, {sing_corr_hi:.4f}]"
-            )
-            print(
-                f"    └─ Incorrect: {sing_inc_count:4d} / {n:4d} = {pct(sing_inc_rate)}  "
-                f"95% CI: [{sing_inc_lo:.4f}, {sing_inc_hi:.4f}]"
-            )
-            print(
-                f"  Singleton error | Y={class_label}: "
-                f"{sing_inc_count:4d} / {sing_count:4d} = {pct(err_given_single_ci['proportion'])}  "
-                f"95% CI: [{err_given_single_ci['lower']:.4f}, {err_given_single_ci['upper']:.4f}]"
-            )
-            print(
-                f"\n  Doublets:     {doub_count:4d} / {n:4d} = {pct(doub_rate)}  95% CI: [{doub_lo:.4f}, {doub_hi:.4f}]"
-            )
-            if pac and pac.get("rho", None) is not None:
-                print(f"\n  PAC Singleton Error Rate (δ={delta_str}):")
-                print(f"    ρ = {pac.get('rho', 0):.3f}, κ = {pac.get('kappa', 0):.3f}")
-                if "alpha_singlet_bound" in pac and "alpha_singlet_observed" in pac:
-                    bound = float(pac["alpha_singlet_bound"])
-                    observed = float(pac["alpha_singlet_observed"])
+            print(f"  Calibration size: n = {n}")
+            if alpha_target is not None:
+                print(f"  Target miscoverage: α = {alpha_target:.3f}")
+            if alpha_corrected is not None:
+                print(f"  SSBC-corrected α:   α' = {alpha_corrected:.4f}")
+            if delta is not None:
+                print(f"  PAC risk:           δ = {delta:.3f}")
+            if threshold is not None:
+                print(f"  Conformal threshold: {threshold:.4f}")
+        # Per-class stats from calibration data (VALID - exchangeable within class)
+        if verbose:
+            print(f"\n  📊 Statistics from Calibration Data (n={n}):")
+            print("     [Basic CP CIs without PAC guarantee - evaluated on calibration data]")
+        # Abstentions
+        abstentions = cls.get("abstentions", {})
+        if isinstance(abstentions, dict):
+            abst_count = abstentions.get("count", 0)
+            abst_ci = cp_interval(abst_count, n)
+            if verbose:
+                print(
+                    f"    Abstentions:  {abst_count:4d} / {n:4d} = {abst_ci['proportion']:6.2%}  "
+                    f"95% CI: [{abst_ci['lower']:.3f}, {abst_ci['upper']:.3f}]"
+                )
+        # Singletons (note: singletons_correct/incorrect are at top level, not nested)
+        singletons = cls.get("singletons", {})
+        singletons_correct = cls.get("singletons_correct", {})
+        singletons_incorrect = cls.get("singletons_incorrect", {})
+        if isinstance(singletons, dict):
+            sing_count = singletons.get("count", 0)
+            sing_correct = singletons_correct.get("count", 0) if isinstance(singletons_correct, dict) else 0
+            sing_incorrect = singletons_incorrect.get("count", 0) if isinstance(singletons_incorrect, dict) else 0
+            # Compute valid CIs (exchangeable within class)
+            sing_ci = cp_interval(sing_count, n)
+            sing_corr_ci = cp_interval(sing_correct, n)
+            sing_inc_ci = cp_interval(sing_incorrect, n)
+            if verbose:
+                print(
+                    f"    Singletons:   {sing_count:4d} / {n:4d} = {sing_ci['proportion']:6.2%}  "
+                    f"95% CI: [{sing_ci['lower']:.3f}, {sing_ci['upper']:.3f}]"
+                )
+                print(
+                    f"      Correct:    {sing_correct:4d} / {n:4d} = {sing_corr_ci['proportion']:6.2%}  "
+                    f"95% CI: [{sing_corr_ci['lower']:.3f}, {sing_corr_ci['upper']:.3f}]"
+                )
+                print(
+                    f"      Incorrect:  {sing_incorrect:4d} / {n:4d} = {sing_inc_ci['proportion']:6.2%}  "
+                    f"95% CI: [{sing_inc_ci['lower']:.3f}, {sing_inc_ci['upper']:.3f}]"
+                )
+                # Error rate given singleton
+                if sing_count > 0:
+                    err_given_sing = cp_interval(sing_incorrect, sing_count)
+                    print(
+                        f"    Error | singleton: {sing_incorrect:4d} / {sing_count:4d} = "
+                        f"{err_given_sing['proportion']:6.2%}  "
+                        f"95% CI: [{err_given_sing['lower']:.3f}, {err_given_sing['upper']:.3f}]"
+                    )
+        # Doublets
+        doublets = cls.get("doublets", {})
+        if isinstance(doublets, dict):
+            doub_count = doublets.get("count", 0)
+            doub_ci = cp_interval(doub_count, n)
+            if verbose:
+                print(
+                    f"    Doublets:     {doub_count:4d} / {n:4d} = {doub_ci['proportion']:6.2%}  "
+                    f"95% CI: [{doub_ci['lower']:.3f}, {doub_ci['upper']:.3f}]"
+                )
+        # PAC bounds (ρ, κ, α'_bound) - important theoretical guarantees
+        pac_bounds = cls.get("pac_bounds", {})
+        if isinstance(pac_bounds, dict) and pac_bounds.get("rho") is not None:
+            if verbose:
+                print(f"\n  📐 PAC Singleton Error Bound (δ={delta:.3f}):")
+                print(f"     ρ = {pac_bounds.get('rho', 0):.3f}, κ = {pac_bounds.get('kappa', 0):.3f}")
+                if "alpha_singlet_bound" in pac_bounds and "alpha_singlet_observed" in pac_bounds:
+                    bound = float(pac_bounds["alpha_singlet_bound"])
+                    observed = float(pac_bounds["alpha_singlet_observed"])
                     ok = "✓" if observed <= bound else "✗"
-                    print(f"    α'_bound:    {bound:.4f}")
-                    print(f"    α'_observed: {observed:.4f} {ok}")
-    # ----------------- marginal / deployment view -----------------
-    if "marginal" in prediction_stats:
-        marg = prediction_stats["marginal"]
-        n_total = int(marg["n_total"])
-        cov = as_dict(marg.get("coverage", {}))
-        abst_m = as_dict(marg.get("abstentions", {}))
-        sing_m = as_dict(marg.get("singletons", {}))
-        doub_m = as_dict(marg.get("doublets", {}))
-        pac_m = as_dict(marg.get("pac_bounds", {}))
-        cov_count = get_count(cov)
-        abst_m_count = get_count(abst_m)
-        sing_total = get_count(sing_m)
-        doub_m_count = get_count(doub_m)
-        cov_rate, cov_lo, cov_hi = ensure_ci(cov, cov_count, n_total)
-        abst_m_rate, abst_m_lo, abst_m_hi = ensure_ci(abst_m, abst_m_count, n_total)
-        sing_m_rate, sing_m_lo, sing_m_hi = ensure_ci(sing_m, sing_total, n_total)
-        doub_m_rate, doub_m_lo, doub_m_hi = ensure_ci(doub_m, doub_m_count, n_total)
-        # pred_0 / pred_1 may be dicts or ints (counts)
-        raw_s0 = sing_m.get("pred_0", 0)
-        raw_s1 = sing_m.get("pred_1", 0)
-        s0_count = get_count(raw_s0)
-        s1_count = get_count(raw_s1)
-        # Prefer provided rate/CI, else compute off n_total
-        if isinstance(raw_s0, dict):
-            s0_rate, s0_lo, s0_hi = ensure_ci(raw_s0, s0_count, n_total)
-        else:
-            s0_ci = cp_interval(s0_count, n_total)
-            s0_rate, s0_lo, s0_hi = s0_ci["proportion"], s0_ci["lower"], s0_ci["upper"]
+                    print(f"     α'_bound:    {bound:.4f}")
+                    print(f"     α'_observed: {observed:.4f} {ok}")
-        if isinstance(raw_s1, dict):
-            s1_rate, s1_lo, s1_hi = ensure_ci(raw_s1, s1_count, n_total)
-        else:
-            s1_ci = cp_interval(s1_count, n_total)
-            s1_rate, s1_lo, s1_hi = s1_ci["proportion"], s1_ci["lower"], s1_ci["upper"]
-        # Overall singleton errors (dict or int). Denominator should be sing_total.
-        raw_s_err = sing_m.get("errors", 0)
-        s_err_count = get_count(raw_s_err)
-        if isinstance(raw_s_err, dict):
-            s_err_rate, s_err_lo, s_err_hi = ensure_ci(raw_s_err, s_err_count, sing_total if sing_total > 0 else 1)
-        else:
-            se_ci = cp_interval(s_err_count, sing_total if sing_total > 0 else 1)
-            s_err_rate, s_err_lo, s_err_hi = se_ci["proportion"], se_ci["lower"], se_ci["upper"]
-        # Errors by predicted class via per-class incorrect singletons
-        # (pred 0 errors happen when Y=1 singleton is wrong; pred 1 errors when Y=0 singleton is wrong)
-        err_pred0_count = int(
-            prediction_stats.get(1, {})
-            .get("singletons", {})
-            .get("incorrect", prediction_stats.get(1, {}).get("singletons_incorrect", {}))
-            .get("count", 0)
-        )
-        err_pred1_count = int(
-            prediction_stats.get(0, {})
-            .get("singletons", {})
-            .get("incorrect", prediction_stats.get(0, {}).get("singletons_incorrect", {}))
-            .get("count", 0)
-        )
-        pred0_err_ci = cp_interval(err_pred0_count, s0_count if s0_count > 0 else 1)
-        pred1_err_ci = cp_interval(err_pred1_count, s1_count if s1_count > 0 else 1)
-        marginal_summary = {
-            "n_total": n_total,
-            "coverage": {"count": cov_count, "rate": cov_rate, "ci_95": (cov_lo, cov_hi)},
-            "abstentions": {"count": abst_m_count, "rate": abst_m_rate, "ci_95": (abst_m_lo, abst_m_hi)},
-            "singletons": {
-                "count": sing_total,
-                "rate": sing_m_rate,
-                "ci_95": (sing_m_lo, sing_m_hi),
-                "pred_0": {"count": s0_count, "rate": s0_rate, "ci_95": (s0_lo, s0_hi)},
-                "pred_1": {"count": s1_count, "rate": s1_rate, "ci_95": (s1_lo, s1_hi)},
-                "errors": {"count": s_err_count, "rate": s_err_rate, "ci_95": (s_err_lo, s_err_hi)},
-                "errors_by_pred": {
-                    "pred_0": {
-                        "count": err_pred0_count,
-                        "denom": s0_count,
-                        "rate": pred0_err_ci["proportion"],
-                        "ci_95": (pred0_err_ci["lower"], pred0_err_ci["upper"]),
-                    },
-                    "pred_1": {
-                        "count": err_pred1_count,
-                        "denom": s1_count,
-                        "rate": pred1_err_ci["proportion"],
-                        "ci_95": (pred1_err_ci["lower"], pred1_err_ci["upper"]),
-                    },
-                },
+        # Operational bounds (RIGOROUS - cross-validated with PAC guarantees)
+        if operational_bounds_per_class and class_label in operational_bounds_per_class:
+            op_bounds = operational_bounds_per_class[class_label]
+            if verbose:
+                print("\n  ✅ RIGOROUS Operational Bounds (LOO-CV)")
+                print(f"     CI width: {op_bounds.ci_width:.1%}")
+                print(f"     Calibration size: n = {op_bounds.n_calibration}")
+            # Show main rates (singleton, doublet, abstention)
+            for rate_name in ["abstention", "singleton", "doublet"]:
+                if rate_name in op_bounds.rate_bounds:
+                    bounds = op_bounds.rate_bounds[rate_name]
+                    if verbose:
+                        print(f"\n     {rate_name.upper()}:")
+                        print(f"       Bounds: [{bounds.lower_bound:.3f}, {bounds.upper_bound:.3f}]")
+                        print(f"       Count: {bounds.n_successes}/{bounds.n_evaluations}")
+            # Show conditional singleton rates (conditional on having a singleton)
+            has_correct = "correct_in_singleton" in op_bounds.rate_bounds
+            has_error = "error_in_singleton" in op_bounds.rate_bounds
+            has_singleton = "singleton" in op_bounds.rate_bounds
+            if verbose and (has_correct or has_error) and has_singleton:
+                print("\n     CONDITIONAL RATES (conditioned on singleton, with CP+PAC bounds):")
+                singleton_bounds = op_bounds.rate_bounds["singleton"]
+                n_singletons = singleton_bounds.n_successes
+                # P(correct | singleton) with rigorous CP bounds
+                if has_correct and n_singletons > 0:
+                    correct_bounds = op_bounds.rate_bounds["correct_in_singleton"]
+                    n_correct = correct_bounds.n_successes
+                    # Conditional rate and CP interval
+                    rate = n_correct / n_singletons if n_singletons > 0 else 0.0
+                    ci = cp_interval(n_correct, n_singletons)
+                    print(f"       P(correct | singleton) = {rate:.3f}  95% CI: [{ci['lower']:.3f}, {ci['upper']:.3f}]")
+                # P(error | singleton) with rigorous CP bounds
+                if has_error and n_singletons > 0:
+                    error_bounds = op_bounds.rate_bounds["error_in_singleton"]
+                    n_error = error_bounds.n_successes
+                    # Conditional rate and CP interval
+                    rate = n_error / n_singletons if n_singletons > 0 else 0.0
+                    ci = cp_interval(n_error, n_singletons)
+                    print(f"       P(error | singleton)   = {rate:.3f}  95% CI: [{ci['lower']:.3f}, {ci['upper']:.3f}]")
+        # Store in summary
+        summary[class_label] = {
+            "n": n,
+            "alpha_target": alpha_target,
+            "alpha_corrected": alpha_corrected,
+            "threshold": threshold,
+            "calibration_stats": {
+                "abstentions": abstentions,
+                "singletons": singletons,
+                "doublets": doublets,
             },
-            "doublets": {"count": doub_m_count, "rate": doub_m_rate, "ci_95": (doub_m_lo, doub_m_hi)},
-            "pac_bounds": pac_m,
+            "pac_bounds": pac_bounds,
         }
-        summary["marginal"] = marginal_summary
+        if operational_bounds_per_class and class_label in operational_bounds_per_class:
+            summary[class_label]["operational_bounds"] = operational_bounds_per_class[class_label]
+    # ==================== MARGINAL STATISTICS ====================
+    if marginal_operational_bounds is not None:
         if verbose:
             print(f"\n{'=' * 80}")
-            print("MARGINAL ANALYSIS (Deployment View - Ignores True Labels)")
+            print("MARGINAL STATISTICS (Deployment View - Ignores True Labels)")
             print(f"{'=' * 80}")
-            print(f"  Total samples: {n_total}")
-            print("\nOverall Coverage:")
-            print(f"  Covered: {cov_count:4d} / {n_total:4d} = {pct(cov_rate)}  95% CI: [{cov_lo:.4f}, {cov_hi:.4f}]")
-            print("\nPrediction Set Distribution:")
-            print(
-                f"  Abstentions: {abst_m_count:4d} / {n_total:4d} = {pct(abst_m_rate)}  "
-                f"95% CI: [{abst_m_lo:.4f}, {abst_m_hi:.4f}]"
-            )
-            print(
-                f"  Singletons:  {sing_total:4d} / {n_total:4d} = {pct(sing_m_rate)}  "
-                f"95% CI: [{sing_m_lo:.4f}, {sing_m_hi:.4f}]"
-            )
-            print(f"    ├─ Pred 0: {s0_count:4d} / {n_total:4d} = {pct(s0_rate)}  95% CI: [{s0_lo:.4f}, {s0_hi:.4f}]")
-            print(f"    ├─ Pred 1: {s1_count:4d} / {n_total:4d} = {pct(s1_rate)}  95% CI: [{s1_lo:.4f}, {s1_hi:.4f}]")
-            print(
-                f"    ├─ Errors (overall): {s_err_count:4d} / {sing_total:4d} = {pct(s_err_rate)}  "
-                f"95% CI: [{s_err_lo:.4f}, {s_err_hi:.4f}]"
-            )
-            print(
-                f"    ├─ Pred 0 errors:    {err_pred0_count:4d} / {s0_count:4d} = {pct(pred0_err_ci['proportion'])}  "
-                f"95% CI: [{pred0_err_ci['lower']:.4f}, {pred0_err_ci['upper']:.4f}]"
-            )
-            print(
-                f"    └─ Pred 1 errors:    {err_pred1_count:4d} / {s1_count:4d} = {pct(pred1_err_ci['proportion'])}  "
-                f"95% CI: [{pred1_err_ci['lower']:.4f}, {pred1_err_ci['upper']:.4f}]"
-            )
-            print(
-                f"  Doublets:    {doub_m_count:4d} / {n_total:4d} = {pct(doub_m_rate)}  "
-                f"95% CI: [{doub_m_lo:.4f}, {doub_m_hi:.4f}]"
-            )
-            if pac_m and pac_m.get("rho", None) is not None:
-                aw = pac_m.get("alpha_weighted", None)
-                aw_str = f"{float(aw):.3f}" if aw is not None else "n/a"
-                print(f"\n  Overall PAC Bounds (weighted α={aw_str}):")
-                print(f"    ρ = {pac_m.get('rho', 0):.3f}, κ = {pac_m.get('kappa', 0):.3f}")
-                if "alpha_singlet_bound" in pac_m and "alpha_singlet_observed" in pac_m:
-                    bound = float(pac_m["alpha_singlet_bound"])
-                    observed = float(pac_m["alpha_singlet_observed"])
-                    ok = "✓" if observed <= bound else "✗"
-                    print(f"    α'_bound:    {bound:.4f}")
-                    print(f"    α'_observed: {observed:.4f} {ok}")
+            print(f"  Total samples: n = {marginal_operational_bounds.n_calibration}")
+            print("\n  ✅ RIGOROUS Marginal Bounds (LOO-CV)")
+            print(f"     CI width: {marginal_operational_bounds.ci_width:.1%}")
+            print(f"     Total evaluations: n = {marginal_operational_bounds.n_calibration}")
+        # Show main rates
+        for rate_name in ["abstention", "singleton", "doublet"]:
+            if rate_name in marginal_operational_bounds.rate_bounds:
+                bounds = marginal_operational_bounds.rate_bounds[rate_name]
+                if verbose:
+                    print(f"\n     {rate_name.upper()}:")
+                    print(f"       Bounds: [{bounds.lower_bound:.3f}, {bounds.upper_bound:.3f}]")
+                    print(f"       Count: {bounds.n_successes}/{bounds.n_evaluations}")
+        # Show conditional singleton rates (marginal)
+        has_correct = "correct_in_singleton" in marginal_operational_bounds.rate_bounds
+        has_error = "error_in_singleton" in marginal_operational_bounds.rate_bounds
+        has_singleton = "singleton" in marginal_operational_bounds.rate_bounds
+        if verbose and (has_correct or has_error) and has_singleton:
+            print("\n     CONDITIONAL RATES (conditioned on singleton, with CP+PAC bounds):")
+            singleton_bounds = marginal_operational_bounds.rate_bounds["singleton"]
+            n_singletons = singleton_bounds.n_successes
+            if has_correct and n_singletons > 0:
+                correct_bounds = marginal_operational_bounds.rate_bounds["correct_in_singleton"]
+                n_correct = correct_bounds.n_successes
+                # Conditional rate and CP interval
+                rate = n_correct / n_singletons if n_singletons > 0 else 0.0
+                ci = cp_interval(n_correct, n_singletons)
+                print(f"       P(correct | singleton) = {rate:.3f}  95% CI: [{ci['lower']:.3f}, {ci['upper']:.3f}]")
+            if has_error and n_singletons > 0:
+                error_bounds = marginal_operational_bounds.rate_bounds["error_in_singleton"]
+                n_error = error_bounds.n_successes
+                # Conditional rate and CP interval
+                rate = n_error / n_singletons if n_singletons > 0 else 0.0
+                ci = cp_interval(n_error, n_singletons)
+                print(f"       P(error | singleton)   = {rate:.3f}  95% CI: [{ci['lower']:.3f}, {ci['upper']:.3f}]")
+        summary["marginal_bounds"] = marginal_operational_bounds
+        if verbose:
+            # Deployment interpretation
+            sing_bounds = marginal_operational_bounds.rate_bounds.get("singleton")
+            doub_bounds = marginal_operational_bounds.rate_bounds.get("doublet")
+            abst_bounds = marginal_operational_bounds.rate_bounds.get("abstention")
+            if sing_bounds:
+                print("\n  📈 Deployment Expectations:")
+                print(
+                    f"     Automation (singletons): "
+                    f"{sing_bounds.lower_bound:.1%} - {sing_bounds.upper_bound:.1%} of cases"
+                )
+                # Escalation = doublets + abstentions
+                if doub_bounds and abst_bounds:
+                    esc_lower = doub_bounds.lower_bound + abst_bounds.lower_bound
+                    esc_upper = doub_bounds.upper_bound + abst_bounds.upper_bound
+                    print(f"     Escalation (doublets+abstentions): {esc_lower:.1%} - {esc_upper:.1%} of cases")
+                elif doub_bounds:
+                    print(
+                        f"     Escalation (doublets): "
+                        f"{doub_bounds.lower_bound:.1%} - {doub_bounds.upper_bound:.1%} of cases"
+                    )
+    # ==================== WARNINGS ====================
+    if verbose:
+        print(f"\n{'=' * 80}")
+        print("NOTES")
+        print(f"{'=' * 80}")
+        print("\n✓ Per-class CIs are valid (Clopper-Pearson, exchangeable within class)")
+        if operational_bounds_per_class or marginal_operational_bounds:
+            print("✓ Operational bounds have PAC guarantees via cross-validation")
+        else:
+            print("\n⚠️  For rigorous deployment bounds, run:")
+            print("   - compute_mondrian_operational_bounds() for per-class bounds")
+            print("   - compute_marginal_operational_bounds() for marginal bounds")
-                n_escalations = int(pac_m.get("n_escalations", doub_m_count + abst_m_count))
-                print("\n  Deployment Decision Mix:")
-                print(f"    Automate: {sing_total} singletons ({sing_m_rate:.1%})")
-                print(f"    Escalate: {n_escalations} doublets+abstentions ({n_escalations / n_total:.1%})")
+        if prediction_stats.get("marginal") and marginal_operational_bounds is None:
+            print("\n⚠️  Marginal stats from calibration data NOT shown (invalid CIs for Mondrian)")
+            print("   Use compute_marginal_operational_bounds() for valid marginal bounds")
     return summary

ssbc 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

ssbc 0.1.0py3-none-any.whl → 1.1.0py3-none-any.whl