PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/metrics/_classification.py ADDED Viewed

@@ -0,0 +1,591 @@
+"""Backend-agnostic binary classification evaluation utilities."""
+from __future__ import annotations
+from typing import Any, Dict, Tuple
+import numpy as np
+from statgpu.backends import _resolve_backend
+def _as_binary_labels_numpy(y, *, name: str) -> np.ndarray:
+    y_arr = np.asarray(y).reshape(-1)
+    unique = np.unique(y_arr)
+    if not np.all(np.isin(unique, [0, 1])):
+        raise ValueError(f"{name} must contain only binary labels encoded as 0/1")
+    return y_arr.astype(np.int64)
+def _as_binary_labels_cupy(y, *, name: str):
+    import cupy as cp
+    y_arr = cp.asarray(y).reshape(-1)
+    unique = cp.unique(y_arr)
+    is_binary = cp.all((unique == 0) | (unique == 1))
+    if not bool(is_binary.item()):
+        raise ValueError(f"{name} must contain only binary labels encoded as 0/1")
+    return y_arr.astype(cp.int64)
+def _as_binary_labels_torch(y, *, name: str):
+    import torch
+    y_arr = torch.as_tensor(y).reshape(-1)
+    unique = torch.unique(y_arr)
+    is_binary = torch.all((unique == 0) | (unique == 1))
+    if not bool(is_binary.item()):
+        raise ValueError(f"{name} must contain only binary labels encoded as 0/1")
+    return y_arr.to(dtype=torch.int64)
+def _binary_confusion_numpy(y_true, y_pred):
+    y_true_arr = _as_binary_labels_numpy(y_true, name="y_true")
+    y_pred_arr = _as_binary_labels_numpy(y_pred, name="y_pred")
+    if y_true_arr.shape[0] != y_pred_arr.shape[0]:
+        raise ValueError("y_true and y_pred must have the same length")
+    tn = np.sum((y_true_arr == 0) & (y_pred_arr == 0))
+    fp = np.sum((y_true_arr == 0) & (y_pred_arr == 1))
+    fn = np.sum((y_true_arr == 1) & (y_pred_arr == 0))
+    tp = np.sum((y_true_arr == 1) & (y_pred_arr == 1))
+    return np.array([[tn, fp], [fn, tp]], dtype=np.int64)
+def _binary_confusion_cupy(y_true, y_pred):
+    import cupy as cp
+    y_true_arr = _as_binary_labels_cupy(y_true, name="y_true")
+    y_pred_arr = _as_binary_labels_cupy(y_pred, name="y_pred")
+    if y_true_arr.shape[0] != y_pred_arr.shape[0]:
+        raise ValueError("y_true and y_pred must have the same length")
+    tn = cp.sum((y_true_arr == 0) & (y_pred_arr == 0))
+    fp = cp.sum((y_true_arr == 0) & (y_pred_arr == 1))
+    fn = cp.sum((y_true_arr == 1) & (y_pred_arr == 0))
+    tp = cp.sum((y_true_arr == 1) & (y_pred_arr == 1))
+    return cp.array([[tn, fp], [fn, tp]], dtype=cp.int64)
+def _binary_confusion_torch(y_true, y_pred):
+    import torch
+    y_true_arr = _as_binary_labels_torch(y_true, name="y_true")
+    y_pred_arr = _as_binary_labels_torch(y_pred, name="y_pred")
+    if y_true_arr.shape[0] != y_pred_arr.shape[0]:
+        raise ValueError("y_true and y_pred must have the same length")
+    tn = torch.sum((y_true_arr == 0) & (y_pred_arr == 0))
+    fp = torch.sum((y_true_arr == 0) & (y_pred_arr == 1))
+    fn = torch.sum((y_true_arr == 1) & (y_pred_arr == 0))
+    tp = torch.sum((y_true_arr == 1) & (y_pred_arr == 1))
+    return torch.stack(
+        [torch.stack([tn, fp]), torch.stack([fn, tp])]
+    ).to(dtype=torch.int64)
+def _classification_table_numpy(y_true, y_pred):
+    cm = _binary_confusion_numpy(y_true, y_pred)
+    tn, fp = int(cm[0, 0]), int(cm[0, 1])
+    fn, tp = int(cm[1, 0]), int(cm[1, 1])
+    total = tn + fp + fn + tp
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
+    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0
+    accuracy = (tp + tn) / total if total > 0 else 0.0
+    return {
+        "tn": tn,
+        "fp": fp,
+        "fn": fn,
+        "tp": tp,
+        "accuracy": accuracy,
+        "precision": precision,
+        "recall": recall,
+        "specificity": specificity,
+        "f1": f1,
+        "support_negative": tn + fp,
+        "support_positive": fn + tp,
+    }
+def _classification_table_cupy(y_true, y_pred):
+    import cupy as cp
+    cm = _binary_confusion_cupy(y_true, y_pred)
+    tn, fp = cm[0, 0], cm[0, 1]
+    fn, tp = cm[1, 0], cm[1, 1]
+    total = tn + fp + fn + tp
+    zero = cp.asarray(0.0, dtype=cp.float64)
+    tp_f = tp.astype(cp.float64)
+    tn_f = tn.astype(cp.float64)
+    fp_f = fp.astype(cp.float64)
+    fn_f = fn.astype(cp.float64)
+    total_f = total.astype(cp.float64)
+    precision = cp.where((tp + fp) > 0, tp_f / (tp_f + fp_f), zero)
+    recall = cp.where((tp + fn) > 0, tp_f / (tp_f + fn_f), zero)
+    specificity = cp.where((tn + fp) > 0, tn_f / (tn_f + fp_f), zero)
+    f1 = cp.where((precision + recall) > 0, 2.0 * precision * recall / (precision + recall), zero)
+    accuracy = cp.where(total > 0, (tp_f + tn_f) / total_f, zero)
+    return {
+        "tn": tn,
+        "fp": fp,
+        "fn": fn,
+        "tp": tp,
+        "accuracy": accuracy,
+        "precision": precision,
+        "recall": recall,
+        "specificity": specificity,
+        "f1": f1,
+        "support_negative": tn + fp,
+        "support_positive": fn + tp,
+    }
+def _classification_table_torch(y_true, y_pred):
+    import torch
+    cm = _binary_confusion_torch(y_true, y_pred)
+    tn, fp = cm[0, 0], cm[0, 1]
+    fn, tp = cm[1, 0], cm[1, 1]
+    total = tn + fp + fn + tp
+    zero = torch.tensor(0.0, device=cm.device, dtype=torch.float64)
+    tp_f = tp.to(torch.float64)
+    tn_f = tn.to(torch.float64)
+    fp_f = fp.to(torch.float64)
+    fn_f = fn.to(torch.float64)
+    total_f = total.to(torch.float64)
+    precision = torch.where((tp + fp) > 0, tp_f / (tp_f + fp_f), zero)
+    recall = torch.where((tp + fn) > 0, tp_f / (tp_f + fn_f), zero)
+    specificity = torch.where((tn + fp) > 0, tn_f / (tn_f + fp_f), zero)
+    f1 = torch.where((precision + recall) > 0, 2.0 * precision * recall / (precision + recall), zero)
+    accuracy = torch.where(total > 0, (tp_f + tn_f) / total_f, zero)
+    return {
+        "tn": tn,
+        "fp": fp,
+        "fn": fn,
+        "tp": tp,
+        "accuracy": accuracy,
+        "precision": precision,
+        "recall": recall,
+        "specificity": specificity,
+        "f1": f1,
+        "support_negative": tn + fp,
+        "support_positive": fn + tp,
+    }
+def _roc_curve_numpy(y_true, y_score):
+    y_true_arr = _as_binary_labels_numpy(y_true, name="y_true")
+    y_score_arr = np.asarray(y_score, dtype=float).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not np.all(np.isfinite(y_score_arr)):
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the ROC curve."
+        )
+    positives = np.sum(y_true_arr == 1)
+    negatives = np.sum(y_true_arr == 0)
+    if positives == 0 or negatives == 0:
+        raise ValueError("ROC is undefined when y_true has only one class")
+    order = np.argsort(y_score_arr, kind="mergesort")[::-1]
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    distinct_value_indices = np.where(np.diff(y_score_sorted))[0]
+    threshold_indices = np.r_[distinct_value_indices, y_true_sorted.size - 1]
+    tps = np.cumsum(y_true_sorted)[threshold_indices]
+    fps = (1 + threshold_indices) - tps
+    tps = np.r_[0, tps]
+    fps = np.r_[0, fps]
+    thresholds = np.r_[np.inf, y_score_sorted[threshold_indices]]
+    tpr = tps / positives
+    fpr = fps / negatives
+    return fpr.astype(float), tpr.astype(float), thresholds.astype(float)
+def _roc_curve_cupy(y_true, y_score):
+    import cupy as cp
+    y_true_arr = _as_binary_labels_cupy(y_true, name="y_true")
+    y_score_arr = cp.asarray(y_score, dtype=cp.float64).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not cp.all(cp.isfinite(y_score_arr)).item():
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the ROC curve."
+        )
+    positives = cp.sum(y_true_arr == 1)
+    negatives = cp.sum(y_true_arr == 0)
+    if int(positives.item()) == 0 or int(negatives.item()) == 0:
+        raise ValueError("ROC is undefined when y_true has only one class")
+    order = cp.argsort(y_score_arr)[::-1]
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    distinct_value_indices = cp.where(cp.diff(y_score_sorted) != 0)[0]
+    threshold_indices = cp.concatenate(
+        [distinct_value_indices, cp.asarray([y_true_sorted.size - 1], dtype=distinct_value_indices.dtype)]
+    )
+    tps = cp.cumsum(y_true_sorted)[threshold_indices]
+    fps = (threshold_indices + 1) - tps
+    tps = cp.concatenate([cp.asarray([0], dtype=tps.dtype), tps])
+    fps = cp.concatenate([cp.asarray([0], dtype=fps.dtype), fps])
+    thresholds = cp.concatenate([cp.asarray([cp.inf], dtype=y_score_sorted.dtype), y_score_sorted[threshold_indices]])
+    tpr = tps.astype(cp.float64) / positives.astype(cp.float64)
+    fpr = fps.astype(cp.float64) / negatives.astype(cp.float64)
+    return fpr, tpr, thresholds
+def _roc_curve_torch(y_true, y_score):
+    import torch
+    y_true_arr = _as_binary_labels_torch(y_true, name="y_true")
+    y_score_arr = torch.as_tensor(y_score, dtype=torch.float64, device=y_true_arr.device).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not torch.all(torch.isfinite(y_score_arr)).item():
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the ROC curve."
+        )
+    positives = torch.sum(y_true_arr == 1)
+    negatives = torch.sum(y_true_arr == 0)
+    if int(positives.item()) == 0 or int(negatives.item()) == 0:
+        raise ValueError("ROC is undefined when y_true has only one class")
+    order = torch.argsort(y_score_arr, descending=True)
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    diff = y_score_sorted[1:] - y_score_sorted[:-1]
+    distinct_value_indices = torch.nonzero(diff != 0, as_tuple=False).reshape(-1)
+    threshold_indices = torch.cat(
+        [
+            distinct_value_indices,
+            torch.tensor([y_true_sorted.numel() - 1], device=y_true_sorted.device, dtype=torch.long),
+        ]
+    )
+    tps = torch.cumsum(y_true_sorted, dim=0)[threshold_indices]
+    fps = (threshold_indices + 1) - tps
+    tps = torch.cat([torch.zeros(1, device=tps.device, dtype=tps.dtype), tps])
+    fps = torch.cat([torch.zeros(1, device=fps.device, dtype=fps.dtype), fps])
+    thresholds = torch.cat(
+        [
+            torch.tensor([float("inf")], device=y_score_sorted.device, dtype=y_score_sorted.dtype),
+            y_score_sorted[threshold_indices],
+        ]
+    )
+    tpr = tps.to(torch.float64) / positives.to(torch.float64)
+    fpr = fps.to(torch.float64) / negatives.to(torch.float64)
+    return fpr, tpr, thresholds
+def _roc_auc_from_curve(backend: str, fpr, tpr):
+    if backend == "numpy":
+        if hasattr(np, "trapezoid"):
+            return float(np.trapezoid(tpr, fpr))
+        return float(np.trapz(tpr, fpr))
+    if backend == "cupy":
+        import cupy as cp
+        if hasattr(cp, "trapezoid"):
+            return cp.trapezoid(tpr, fpr)
+        return cp.trapz(tpr, fpr)
+    import torch
+    if hasattr(torch, "trapezoid"):
+        return torch.trapezoid(tpr, fpr)
+    return torch.trapz(tpr, fpr)
+def _precision_recall_curve_numpy(y_true, y_score):
+    y_true_arr = _as_binary_labels_numpy(y_true, name="y_true")
+    y_score_arr = np.asarray(y_score, dtype=float).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not np.all(np.isfinite(y_score_arr)):
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the precision-recall curve."
+        )
+    positives = np.sum(y_true_arr == 1)
+    if positives == 0:
+        raise ValueError("Precision-recall is undefined when y_true has no positive class")
+    order = np.argsort(y_score_arr, kind="mergesort")[::-1]
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    distinct_value_indices = np.where(np.diff(y_score_sorted))[0]
+    threshold_indices = np.r_[distinct_value_indices, y_true_sorted.size - 1]
+    tps = np.cumsum(y_true_sorted)[threshold_indices]
+    fps = (1 + threshold_indices) - tps
+    precision = np.divide(tps, tps + fps, out=np.ones_like(tps, dtype=float), where=(tps + fps) != 0)
+    recall = tps / positives
+    thresholds = y_score_sorted[threshold_indices]
+    precision = np.r_[1.0, precision]
+    recall = np.r_[0.0, recall]
+    thresholds = np.r_[np.inf, thresholds]
+    return precision.astype(float), recall.astype(float), thresholds.astype(float)
+def _precision_recall_curve_cupy(y_true, y_score):
+    import cupy as cp
+    y_true_arr = _as_binary_labels_cupy(y_true, name="y_true")
+    y_score_arr = cp.asarray(y_score, dtype=cp.float64).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not cp.all(cp.isfinite(y_score_arr)).item():
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the precision-recall curve."
+        )
+    positives = cp.sum(y_true_arr == 1)
+    if int(positives.item()) == 0:
+        raise ValueError("Precision-recall is undefined when y_true has no positive class")
+    order = cp.argsort(y_score_arr)[::-1]
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    distinct_value_indices = cp.where(cp.diff(y_score_sorted) != 0)[0]
+    threshold_indices = cp.concatenate(
+        [distinct_value_indices, cp.asarray([y_true_sorted.size - 1], dtype=distinct_value_indices.dtype)]
+    )
+    tps = cp.cumsum(y_true_sorted)[threshold_indices]
+    fps = (threshold_indices + 1) - tps
+    denom = (tps + fps).astype(cp.float64)
+    safe_denom = cp.where(denom != 0, denom, cp.asarray(1.0, dtype=cp.float64))
+    precision = tps.astype(cp.float64) / safe_denom
+    precision = cp.where(denom != 0, precision, cp.ones_like(precision))
+    recall = tps.astype(cp.float64) / positives.astype(cp.float64)
+    thresholds = y_score_sorted[threshold_indices]
+    precision = cp.concatenate([cp.asarray([1.0], dtype=cp.float64), precision])
+    recall = cp.concatenate([cp.asarray([0.0], dtype=cp.float64), recall])
+    thresholds = cp.concatenate([cp.asarray([cp.inf], dtype=y_score_sorted.dtype), thresholds])
+    return precision, recall, thresholds
+def _precision_recall_curve_torch(y_true, y_score):
+    import torch
+    y_true_arr = _as_binary_labels_torch(y_true, name="y_true")
+    y_score_arr = torch.as_tensor(y_score, dtype=torch.float64, device=y_true_arr.device).reshape(-1)
+    if y_true_arr.shape[0] != y_score_arr.shape[0]:
+        raise ValueError("y_true and y_score must have the same length")
+    if not torch.all(torch.isfinite(y_score_arr)).item():
+        raise ValueError(
+            "y_score contains non-finite values (NaN or inf). "
+            "All scores must be finite to compute the precision-recall curve."
+        )
+    positives = torch.sum(y_true_arr == 1)
+    if int(positives.item()) == 0:
+        raise ValueError("Precision-recall is undefined when y_true has no positive class")
+    order = torch.argsort(y_score_arr, descending=True)
+    y_true_sorted = y_true_arr[order]
+    y_score_sorted = y_score_arr[order]
+    diff = y_score_sorted[1:] - y_score_sorted[:-1]
+    distinct_value_indices = torch.nonzero(diff != 0, as_tuple=False).reshape(-1)
+    threshold_indices = torch.cat(
+        [
+            distinct_value_indices,
+            torch.tensor([y_true_sorted.numel() - 1], device=y_true_sorted.device, dtype=torch.long),
+        ]
+    )
+    tps = torch.cumsum(y_true_sorted, dim=0)[threshold_indices]
+    fps = (threshold_indices + 1) - tps
+    denom = (tps + fps).to(torch.float64)
+    safe_denom = torch.where(denom != 0, denom, torch.ones_like(denom))
+    precision = tps.to(torch.float64) / safe_denom
+    precision = torch.where(denom != 0, precision, torch.ones_like(precision))
+    recall = tps.to(torch.float64) / positives.to(torch.float64)
+    thresholds = y_score_sorted[threshold_indices]
+    precision = torch.cat([torch.tensor([1.0], device=precision.device, dtype=precision.dtype), precision])
+    recall = torch.cat([torch.tensor([0.0], device=recall.device, dtype=recall.dtype), recall])
+    thresholds = torch.cat(
+        [torch.tensor([float("inf")], device=thresholds.device, dtype=thresholds.dtype), thresholds]
+    )
+    return precision, recall, thresholds
+def _average_precision_from_curve(backend: str, precision, recall):
+    if backend == "numpy":
+        return float(np.sum(np.diff(recall) * precision[1:]))
+    if backend == "cupy":
+        import cupy as cp
+        return cp.sum(cp.diff(recall) * precision[1:])
+    import torch
+    return torch.sum((recall[1:] - recall[:-1]) * precision[1:])
+def binary_confusion_matrix(y_true, y_pred, backend: str = "auto"):
+    backend_name = _resolve_backend(backend, y_true, y_pred)
+    if backend_name == "numpy":
+        return _binary_confusion_numpy(y_true, y_pred)
+    if backend_name == "cupy":
+        return _binary_confusion_cupy(y_true, y_pred)
+    return _binary_confusion_torch(y_true, y_pred)
+def binary_classification_table(y_true, y_pred, backend: str = "auto") -> Dict[str, Any]:
+    backend_name = _resolve_backend(backend, y_true, y_pred)
+    if backend_name == "numpy":
+        return _classification_table_numpy(y_true, y_pred)
+    if backend_name == "cupy":
+        return _classification_table_cupy(y_true, y_pred)
+    return _classification_table_torch(y_true, y_pred)
+def binary_roc_curve(y_true, y_score, backend: str = "auto"):
+    backend_name = _resolve_backend(backend, y_true, y_score)
+    if backend_name == "numpy":
+        return _roc_curve_numpy(y_true, y_score)
+    if backend_name == "cupy":
+        return _roc_curve_cupy(y_true, y_score)
+    return _roc_curve_torch(y_true, y_score)
+def binary_roc_auc_score(y_true, y_score, backend: str = "auto"):
+    backend_name = _resolve_backend(backend, y_true, y_score)
+    fpr, tpr, _ = binary_roc_curve(y_true, y_score, backend=backend_name)
+    return _roc_auc_from_curve(backend_name, fpr, tpr)
+def binary_precision_recall_curve(y_true, y_score, backend: str = "auto"):
+    backend_name = _resolve_backend(backend, y_true, y_score)
+    if backend_name == "numpy":
+        return _precision_recall_curve_numpy(y_true, y_score)
+    if backend_name == "cupy":
+        return _precision_recall_curve_cupy(y_true, y_score)
+    return _precision_recall_curve_torch(y_true, y_score)
+def binary_average_precision_score(y_true, y_score, backend: str = "auto"):
+    backend_name = _resolve_backend(backend, y_true, y_score)
+    precision, recall, _ = binary_precision_recall_curve(y_true, y_score, backend=backend_name)
+    return _average_precision_from_curve(backend_name, precision, recall)
+def evaluate_binary_classification(
+    y_true,
+    y_score,
+    threshold: float = 0.5,
+    include_curves: bool = True,
+    backend: str = "auto",
+) -> Dict[str, Any]:
+    """
+    One-shot binary evaluation from external class-1 probabilities.
+    Parameters
+    ----------
+    y_true : array-like
+        Binary labels encoded as 0/1.
+    y_score : array-like
+        Predicted probabilities for positive class.
+    threshold : float, default=0.5
+        Threshold used for hard predictions in confusion/table metrics.
+    include_curves : bool, default=True
+        Whether to include full ROC/PR curve arrays.
+    backend : {'auto', 'numpy', 'cupy', 'torch'}, default='auto'
+        Backend selection. ``'auto'`` is inferred from input arrays.
+    Returns
+    -------
+    dict
+        Batch evaluation dictionary.
+    """
+    if threshold < 0.0 or threshold > 1.0:
+        raise ValueError("threshold must be in [0, 1]")
+    backend_name = _resolve_backend(backend, y_true, y_score)
+    if backend_name == "numpy":
+        y_score_arr = np.asarray(y_score, dtype=float).reshape(-1)
+        if not np.all(np.isfinite(y_score_arr)):
+            raise ValueError(
+                "y_score contains non-finite values (NaN or inf). "
+                "Ensure all predicted probabilities are finite before calling evaluate_binary_classification."
+            )
+        y_pred = (y_score_arr >= threshold).astype(np.int64)
+    elif backend_name == "cupy":
+        import cupy as cp
+        y_score_arr = cp.asarray(y_score, dtype=cp.float64).reshape(-1)
+        if not cp.all(cp.isfinite(y_score_arr)).item():
+            raise ValueError(
+                "y_score contains non-finite values (NaN or inf). "
+                "Ensure all predicted probabilities are finite before calling evaluate_binary_classification."
+            )
+        y_pred = (y_score_arr >= threshold).astype(cp.int64)
+    else:
+        import torch
+        y_true_t = torch.as_tensor(y_true)
+        y_score_arr = torch.as_tensor(y_score, dtype=torch.float64, device=y_true_t.device).reshape(-1)
+        if not torch.all(torch.isfinite(y_score_arr)).item():
+            raise ValueError(
+                "y_score contains non-finite values (NaN or inf). "
+                "Ensure all predicted probabilities are finite before calling evaluate_binary_classification."
+            )
+        y_pred = (y_score_arr >= threshold).to(dtype=torch.int64)
+    result: Dict[str, Any] = {
+        "threshold": float(threshold),
+        "confusion_matrix": binary_confusion_matrix(y_true, y_pred, backend=backend_name),
+        "classification_table": binary_classification_table(y_true, y_pred, backend=backend_name),
+    }
+    fpr, tpr, roc_thresholds = binary_roc_curve(y_true, y_score_arr, backend=backend_name)
+    precision, recall, pr_thresholds = binary_precision_recall_curve(y_true, y_score_arr, backend=backend_name)
+    result["roc_auc"] = _roc_auc_from_curve(backend_name, fpr, tpr)
+    result["average_precision"] = _average_precision_from_curve(backend_name, precision, recall)
+    if include_curves:
+        result["roc_curve"] = {
+            "fpr": fpr,
+            "tpr": tpr,
+            "thresholds": roc_thresholds,
+        }
+        result["precision_recall_curve"] = {
+            "precision": precision,
+            "recall": recall,
+            "thresholds": pr_thresholds,
+        }
+    return result

statgpu/nonparametric/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Nonparametric estimators."""
+# Kernel smoothing (KDE + Nadaraya-Watson kernel regression)
+from .kernel_smoothing import (
+    BandwidthSelectionResult,
+    select_bandwidth,
+    select_bandwidth_factor,
+    KernelDensityEstimator,
+    KDE,
+    KDEBootstrapResult,
+    fit_kde,
+    kde_pdf,
+    kde_confidence_interval,
+    kde_bootstrap_confidence_interval,
+    KernelRegression,
+    KernelRegressionRegressor,
+    fit_kernel_regression,
+    kernel_regression_predict,
+)
+# Kernel ridge regression
+from .kernel_methods import KernelRidge, KernelRidgeCV, pairwise_kernels
+# Spline basis functions
+from .splines import bspline_basis, natural_cubic_spline_basis
+__all__ = [
+    # Kernel smoothing
+    "BandwidthSelectionResult",
+    "select_bandwidth",
+    "select_bandwidth_factor",
+    "KernelDensityEstimator",
+    "KDE",
+    "KDEBootstrapResult",
+    "fit_kde",
+    "kde_pdf",
+    "kde_confidence_interval",
+    "kde_bootstrap_confidence_interval",
+    "KernelRegression",
+    "KernelRegressionRegressor",
+    "fit_kernel_regression",
+    "kernel_regression_predict",
+    # Kernel methods
+    "KernelRidge",
+    "KernelRidgeCV",
+    "pairwise_kernels",
+    # Splines
+    "bspline_basis",
+    "natural_cubic_spline_basis",
+]

statgpu/nonparametric/kernel_methods/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Kernel methods with GPU acceleration."""
+from ._kernels import (
+    rbf_kernel,
+    polynomial_kernel,
+    linear_kernel,
+    laplacian_kernel,
+    sigmoid_kernel,
+    cosine_kernel,
+    pairwise_kernels,
+)
+from ._krr import KernelRidge
+from ._krr_cv import KernelRidgeCV
+__all__ = [
+    "rbf_kernel",
+    "polynomial_kernel",
+    "linear_kernel",
+    "laplacian_kernel",
+    "sigmoid_kernel",
+    "cosine_kernel",
+    "pairwise_kernels",
+    "KernelRidge",
+    "KernelRidgeCV",
+]