PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/nonparametric/splines/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Spline basis function utilities."""
+from ._bspline_basis import bspline_basis, natural_cubic_spline_basis
+__all__ = ['bspline_basis', 'natural_cubic_spline_basis']

statgpu/nonparametric/splines/_bspline_basis.py ADDED Viewed

@@ -0,0 +1,336 @@
+"""
+B-spline and natural cubic spline basis construction with GPU support.
+Implements De Boor's recursive algorithm for B-spline basis evaluation,
+vectorized over sample points for efficient GPU computation.
+"""
+from __future__ import annotations
+import numpy as np
+from statgpu.backends import _LINALG_ERRORS, _torch_dev, _to_numpy, xp_zeros, xp_eye, xp_full, xp_astype, xp_asarray
+def _get_xp(xp):
+    """Return the array module (numpy if xp is None)."""
+    return xp if xp is not None else np
+def bspline_basis(x, knots, degree=3, xp=None, boundary_lo=None, boundary_hi=None):
+    """
+    Evaluate B-spline basis matrix at points x.
+    Uses De Boor's recursive algorithm, vectorized over all sample points
+    for efficient GPU computation.
+    Parameters
+    ----------
+    x : array-like, shape (n,)
+        Evaluation points.
+    knots : array-like, shape (m,)
+        Interior knots (must be strictly increasing).
+    degree : int, default=3
+        Spline degree (3 = cubic).
+    xp : module, optional
+        Array module (numpy, cupy, or torch). If None, uses numpy.
+    boundary_lo : float, optional
+        Explicit lower boundary knot. If None, uses min(x, knots).
+    boundary_hi : float, optional
+        Explicit upper boundary knot. If None, uses max(x, knots).
+    Returns
+    -------
+    B : array, shape (n, m + degree + 1)
+        B-spline basis matrix. Each row corresponds to a sample point,
+        each column to a basis function.
+    """
+    xp = _get_xp(xp)
+    x = xp.asarray(x, dtype=xp.float64).ravel()
+    knots = xp.asarray(knots, dtype=xp.float64).ravel()
+    n = x.shape[0]
+    m = knots.shape[0]
+    if m == 0:
+        raise ValueError("At least one interior knot is required")
+    # Construct augmented knot vector:
+    # t = [boundary_lo]*(degree+1), knots..., [boundary_hi]*(degree+1)
+    # Use explicit boundaries if provided (e.g., training range for prediction),
+    # otherwise use the wider of (x range, knots range).
+    knot_min = float(xp.min(knots))
+    knot_max = float(xp.max(knots))
+    if boundary_lo is None:
+        x_min = float(xp.min(x))
+        boundary_lo = min(x_min, knot_min)
+    if boundary_hi is None:
+        x_max = float(xp.max(x))
+        boundary_hi = max(x_max, knot_max)
+    # Ensure interior knots are strictly within boundary
+    if knot_min <= boundary_lo or knot_max >= boundary_hi:
+        raise ValueError(
+            "Interior knots must be strictly within the boundary range "
+            f"({boundary_lo}, {boundary_hi})"
+        )
+    left_pad = xp_full(degree + 1, boundary_lo, xp.float64, xp, x)
+    right_pad = xp_full(degree + 1, boundary_hi, xp.float64, xp, x)
+    t = xp.concatenate([left_pad, knots, right_pad])
+    n_knots = len(t)
+    n_basis = n_knots - degree - 1  # = m + degree + 1
+    # Pre-extract all knot values to CPU in one transfer
+    t_cpu = _to_numpy(t).tolist()
+    # De Boor recursion, vectorized over x
+    # Initialize degree-0 indicator functions for all n_knots-1 intervals.
+    n_intervals = n_knots - 1
+    B = xp_zeros((n, n_intervals), xp.float64, xp, x)
+    # B_{i,0}(x) = 1 if t_i <= x < t_{i+1} else 0
+    # For the last non-degenerate interval, include right endpoint.
+    last_nondeg = -1
+    for i in range(n_intervals):
+        if t_cpu[i + 1] > t_cpu[i]:
+            last_nondeg = i
+    # Vectorized degree-0 initialization
+    for i in range(n_intervals):
+        t_i = t_cpu[i]
+        t_ip1 = t_cpu[i + 1]
+        if t_ip1 > t_i:
+            if i == last_nondeg:
+                mask = (x >= t_i) & (x <= t_ip1)
+            else:
+                mask = (x >= t_i) & (x < t_ip1)
+            B[:, i] = xp_astype(mask, xp.float64, xp)
+    # Recursive computation for degrees 1, 2, ..., degree
+    # Outer loop has data dependencies (each k uses B from k-1).
+    # Inner loop over basis functions is vectorized.
+    for k in range(1, degree + 1):
+        n_cur = n_intervals - k
+        # Precompute knot arrays for all basis functions at once
+        # Use xp_asarray with ref_arr=x to ensure same device (GPU if x is on GPU)
+        t_lo = xp_asarray([t_cpu[i] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
+        t_hi = xp_asarray([t_cpu[i + k] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
+        t_ip1 = xp_asarray([t_cpu[i + 1] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
+        t_ip1k = xp_asarray([t_cpu[i + 1 + k] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
+        denom1 = t_hi - t_lo
+        denom2 = t_ip1k - t_ip1
+        # Safe denominators (replace 0 with 1 to avoid division by zero)
+        safe_denom1 = xp.where(denom1 > 0, denom1, 1.0)
+        safe_denom2 = xp.where(denom2 > 0, denom2, 1.0)
+        # Masks: (n_cur,)
+        mask1 = denom1 > 0
+        mask2 = denom2 > 0
+        # w1, w2: (n, n_cur) — broadcast x (n,) with knot arrays (n_cur,)
+        w1 = xp.where(mask1[None, :], (x[:, None] - t_lo[None, :]) / safe_denom1[None, :], 0.0)
+        w2 = xp.where(mask2[None, :], (x[:, None] - t_ip1[None, :]) / safe_denom2[None, :], 0.0)
+        # Vectorized De Boor step: (n, n_cur)
+        B_new = w1 * B[:, :n_cur] + (1.0 - w2) * B[:, 1:n_cur + 1]
+        B = B_new
+    # Final result has exactly n_basis columns
+    return B
+def natural_cubic_spline_basis(x, knots, xp=None):
+    """
+    Natural cubic spline basis (linear beyond boundary knots).
+    Constructs a cubic B-spline basis and applies boundary constraints
+    to enforce linearity beyond the boundary knots. This reduces the
+    effective number of basis functions by 2 compared to a regular
+    cubic B-spline.
+    Parameters
+    ----------
+    x : array-like, shape (n,)
+        Evaluation points.
+    knots : array-like, shape (m,)
+        Interior knots (must be strictly increasing).
+    xp : module, optional
+        Array module (numpy, cupy, or torch). If None, uses numpy.
+    Returns
+    -------
+    B : array, shape (n, m + 1)
+        Natural cubic spline basis matrix. The first column is typically
+        the intercept (constant), and the remaining columns are the
+        natural spline basis functions.
+    """
+    xp = _get_xp(xp)
+    x = xp.asarray(x, dtype=xp.float64).ravel()
+    knots = xp.asarray(knots, dtype=xp.float64).ravel()
+    n = x.shape[0]
+    m = knots.shape[0]
+    if m < 1:
+        raise ValueError("At least one interior knot is required for natural cubic splines")
+    x_min = float(xp.min(x))
+    x_max = float(xp.max(x))
+    # Build cubic B-spline basis
+    # Use a range that covers both x and knots for bspline_basis
+    knot_min = float(xp.min(knots))
+    knot_max = float(xp.max(knots))
+    eval_min = min(x_min, knot_min - 1.0)
+    eval_max = max(x_max, knot_max + 1.0)
+    B_cubic = bspline_basis(x, knots, degree=3, xp=xp)
+    n_basis = B_cubic.shape[1]
+    # Apply boundary constraints to enforce linearity beyond boundary knots.
+    # The constraint is that the second derivative is zero at the boundary knots.
+    # Build the constraint matrix C such that C @ beta = 0
+    # where beta are the coefficients of the cubic B-spline basis.
+    # For numerical differentiation, we use points near the boundaries
+    # but with a range wide enough to cover the knots.
+    eps = 1e-6
+    # Create evaluation arrays wide enough for bspline_basis validation
+    # Second derivative at x_min (near left boundary knot)
+    x_eval_lo = xp_asarray([x_min, x_min + eps, x_min + 2 * eps,
+                            x_max, x_max - eps, x_max - 2 * eps],
+                           dtype=xp.float64, xp=xp, ref_arr=x)
+    # Build basis at all 6 evaluation points at once
+    B_eval = bspline_basis(x_eval_lo, knots, degree=3, xp=xp)
+    # Extract individual evaluations
+    B_lo = B_eval[0:1, :]
+    B_lo_eps = B_eval[1:2, :]
+    B_lo_eps2 = B_eval[2:3, :]
+    B_hi = B_eval[3:4, :]
+    B_hi_eps = B_eval[4:5, :]
+    B_hi_eps2 = B_eval[5:6, :]
+    d2_lo = (B_lo_eps2 - 2 * B_lo_eps + B_lo) / (eps ** 2)
+    d2_hi = (B_hi_eps2 - 2 * B_hi_eps + B_hi) / (eps ** 2)
+    # Stack constraints: C is (2, n_basis)
+    C = xp.vstack([d2_lo, d2_hi])
+    # Find null space of C using SVD.
+    # C is (2, n_basis).  SVD gives U(2,2), S(2,), Vh(n_basis, n_basis).
+    # The null space is spanned by the last (n_basis - rank) rows of Vh.
+    try:
+        U, S_vals, Vh = xp.linalg.svd(C)
+        n_rank = int(xp.sum(S_vals > max(C.shape) * S_vals[0] * xp.finfo(xp.float64).eps))
+        null_space = Vh[n_rank:].T  # shape: (n_basis, n_basis - n_rank)
+    except _LINALG_ERRORS:
+        # Fallback: compute null space of C via QR
+        Q_c, R_c = xp.linalg.qr(C.T, mode='reduced')
+        # Null space is the complement of column space of C.T
+        # Build full QR of identity and project out C's column space
+        Q_full, _ = xp.linalg.qr(xp.eye(n_basis, dtype=xp.float64))
+        # Remove components in C's column space
+        proj = Q_full - Q_c @ (Q_c.T @ Q_full)
+        # Re-orthogonalize to get clean null space basis
+        Q_null, _ = xp.linalg.qr(proj, mode='reduced')
+        null_space = Q_null[:, C.shape[0]:] if Q_null.shape[1] > C.shape[0] else Q_null
+    # Project the B-spline basis onto the null space
+    # B_natural = B_cubic @ null_space
+    B_natural = B_cubic @ null_space
+    return B_natural
+def _bspline_basis_derivative(x, knots, degree=3, deriv_order=1, xp=None):
+    """
+    Evaluate derivative of B-spline basis.
+    Uses the derivative formula for B-splines:
+    B'_{i,k}(x) = k/(t_{i+k} - t_i) * B_{i,k-1}(x) - k/(t_{i+k+1} - t_{i+1}) * B_{i+1,k-1}(x)
+    Parameters
+    ----------
+    x : array-like, shape (n,)
+        Evaluation points.
+    knots : array-like, shape (m,)
+        Interior knots.
+    degree : int, default=3
+        Spline degree.
+    deriv_order : int, default=1
+        Order of derivative (must be <= degree).
+    xp : module, optional
+        Array module.
+    Returns
+    -------
+    dB : array, shape (n, n_basis)
+        Derivative of B-spline basis matrix.
+    """
+    xp = _get_xp(xp)
+    if deriv_order > degree:
+        return xp_zeros((len(x), len(knots) + degree + 1), xp.float64, xp, x)
+    if deriv_order == 0:
+        return bspline_basis(x, knots, degree=degree, xp=xp)
+    # Compute derivative using the recursive formula
+    # For first derivative of degree k B-spline:
+    # B'_{i,k} = k/(t_{i+k}-t_i) * B_{i,k-1} - k/(t_{i+k+1}-t_{i+1}) * B_{i+1,k-1}
+    x = xp_asarray(x, dtype=xp.float64, xp=xp).ravel()
+    knots = xp_asarray(knots, dtype=xp.float64, xp=xp, ref_arr=x).ravel()
+    x_min = float(xp.min(x))
+    x_max = float(xp.max(x))
+    knot_min = float(xp.min(knots))
+    knot_max = float(xp.max(knots))
+    # Use same boundary logic as bspline_basis
+    boundary_lo = min(x_min, knot_min)
+    boundary_hi = max(x_max, knot_max)
+    left_pad = xp_full(degree + 1, boundary_lo, xp.float64, xp, x)
+    right_pad = xp_full(degree + 1, boundary_hi, xp.float64, xp, x)
+    t = xp.concatenate([left_pad, knots, right_pad])
+    # Get B-spline basis of degree (degree - deriv_order) with SAME knot vector
+    reduced_degree = degree - deriv_order
+    B_reduced = bspline_basis(x, knots, degree=reduced_degree, xp=xp,
+                              boundary_lo=boundary_lo, boundary_hi=boundary_hi)
+    n_basis = len(t) - degree - 1
+    n_basis_reduced = len(t) - reduced_degree - 1
+    # Apply the derivative formula recursively
+    # For each derivative order, we apply:
+    # dB_{i,k} = k/(t_{i+k}-t_i) * B_{i,k-1} - k/(t_{i+k+1}-t_{i+1}) * B_{i+1,k-1}
+    dB = B_reduced
+    for d in range(deriv_order):
+        current_degree = reduced_degree + d
+        n_current = dB.shape[1]
+        dB_new = xp_zeros((len(x), n_current - 1), xp.float64, xp, x)
+        for i in range(n_current - 1):
+            denom1 = float(t[i + current_degree] - t[i])
+            denom2 = float(t[i + current_degree + 1] - t[i + 1])
+            term1 = (current_degree / denom1 * dB[:, i]) if denom1 > 0 else xp_zeros(len(x), xp.float64, xp, x)
+            term2 = (current_degree / denom2 * dB[:, i + 1]) if denom2 > 0 else xp_zeros(len(x), xp.float64, xp, x)
+            dB_new[:, i] = term1 - term2
+        dB = dB_new
+    return dB

statgpu/nonparametric/splines/_penalized.py ADDED Viewed

@@ -0,0 +1,349 @@
+"""
+Penalized least squares utilities for spline smoothing.
+Provides functions for solving penalized regression problems and
+constructing difference penalty matrices for spline smoothing.
+"""
+from __future__ import annotations
+import numpy as np
+from statgpu.backends import _LINALG_ERRORS, _torch_dev, _to_numpy, xp_zeros, xp_eye, xp_asarray, xp_cholesky_solve
+def _get_xp(xp):
+    """Return the array module (numpy if xp is None)."""
+    return xp if xp is not None else np
+def difference_penalty(order, n_coef, xp=None):
+    """
+    Construct difference penalty matrix of given order.
+    The penalty matrix S = D^T @ D penalizes differences between
+    adjacent coefficients, encouraging smoothness.
+    Parameters
+    ----------
+    order : int
+        Order of differences.  order=1 penalizes first differences
+        (piecewise linear), order=2 penalizes second differences
+        (piecewise quadratic, the default for smoothing).
+    n_coef : int
+        Number of spline coefficients (basis functions).
+    xp : module, optional
+        Array module (numpy, cupy, or torch). If None, uses numpy.
+    Returns
+    -------
+    S : array, shape (n_coef, n_coef)
+        Penalty matrix (positive semi-definite).
+    """
+    xp = _get_xp(xp)
+    if order < 1:
+        raise ValueError("Penalty order must be >= 1")
+    if n_coef <= order:
+        raise ValueError(
+            f"n_coef ({n_coef}) must be greater than order ({order})"
+        )
+    # Construct difference matrix D of shape (n_coef - order, n_coef)
+    # For order=1: D[i, i] = -1, D[i, i+1] = 1
+    # For order=2: D[i, i] = 1, D[i, i+1] = -2, D[i, i+2] = 1
+    # Build D using iterative differencing of identity matrix
+    D = xp_eye(n_coef, xp.float64, xp)
+    for _ in range(order):
+        # First differences of current D
+        D = D[1:, :] - D[:-1, :]
+    # Penalty matrix S = D^T @ D
+    S = D.T @ D
+    return S
+def penalized_ls(B, y, penalty_matrix, lambda_, xp=None):
+    """
+    Solve penalized least squares problem.
+    Minimizes: ||y - B @ beta||^2 + lambda_ * beta^T @ S @ beta
+    Parameters
+    ----------
+    B : array, shape (n, p)
+        Basis matrix (design matrix for the spline).
+    y : array, shape (n,) or (n, 1)
+        Response vector.
+    penalty_matrix : array, shape (p, p)
+        Penalty matrix S (positive semi-definite).
+    lambda_ : float
+        Smoothing parameter (must be non-negative).
+    xp : module, optional
+        Array module (numpy, cupy, or torch). If None, uses numpy.
+    Returns
+    -------
+    beta : array, shape (p,) or (p, 1)
+        Fitted coefficients.
+    edf : float
+        Effective degrees of freedom: trace(B @ (B^T @ B + lambda_ * S)^{-1} @ B^T).
+    """
+    xp = _get_xp(xp)
+    B = xp_asarray(B, dtype=xp.float64, xp=xp)
+    y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
+    penalty_matrix = xp_asarray(penalty_matrix, dtype=xp.float64, xp=xp, ref_arr=B)
+    if y.ndim == 1:
+        y = y.reshape(-1, 1)
+    n, p = B.shape
+    # Normal equations: (B^T @ B + lambda_ * S) @ beta = B^T @ y
+    BtB = B.T @ B
+    Bty = B.T @ y
+    A = BtB + lambda_ * penalty_matrix
+    # Solve using Cholesky decomposition (more efficient for symmetric positive definite)
+    A_used = A  # track which matrix was actually used (for edf consistency)
+    try:
+        # Add small jitter for numerical stability
+        jitter = 1e-10 * xp.trace(A) / p
+        A_stable = A + jitter * xp_eye(p, xp.float64, xp, A)
+        A_used = A_stable
+        beta = xp_cholesky_solve(A_stable, Bty, xp)
+    except _LINALG_ERRORS:
+        # Fallback to general solve
+        try:
+            beta = xp.linalg.solve(A, Bty)
+        except _LINALG_ERRORS:
+            # Last resort: least squares
+            beta = xp.linalg.lstsq(A, Bty, rcond=None)[0]
+    # Effective degrees of freedom: edf = tr(A^{-1} @ B^T @ B)
+    # Use the same matrix as the beta solve for consistency.
+    try:
+        A_inv_BtB = xp.linalg.solve(A_used, BtB)
+        edf = xp.trace(A_inv_BtB)
+        # Clamp edf to valid range [0, p]
+        # Keep as GPU scalar — use clip/clamp for device compatibility
+        if hasattr(edf, 'clamp'):  # torch
+            edf = edf.clamp(0.0, float(p))
+        else:  # numpy/cupy
+            edf = xp.clip(edf, 0.0, float(p))
+    except _LINALG_ERRORS:
+        edf = float(p)
+    # Flatten beta if y was 1D
+    if y.shape[1] == 1:
+        beta = beta.ravel()
+    return beta, edf
+def generalized_cross_validation(B, y, penalty_matrix, lambda_, xp=None):
+    """
+    Compute Generalized Cross-Validation (GCV) score.
+    GCV = n * RSS / (n - edf)^2
+    where RSS is the residual sum of squares and edf is the effective
+    degrees of freedom.
+    Parameters
+    ----------
+    B : array, shape (n, p)
+        Basis matrix.
+    y : array, shape (n,)
+        Response vector.
+    penalty_matrix : array, shape (p, p)
+        Penalty matrix.
+    lambda_ : float
+        Smoothing parameter.
+    xp : module, optional
+        Array module.
+    Returns
+    -------
+    gcv : float
+        GCV score (lower is better).
+    """
+    xp = _get_xp(xp)
+    B = xp_asarray(B, dtype=xp.float64, xp=xp)
+    y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
+    beta, edf = penalized_ls(B, y, penalty_matrix, lambda_, xp)
+    resid = y - B @ beta
+    n = len(y)
+    rss = xp.sum(resid ** 2)  # GPU scalar, no sync
+    # Avoid division by zero or negative denom (edf >= n)
+    denom = 1.0 - edf / n
+    # Keep denom as GPU scalar for xp.where compatibility
+    if hasattr(denom, 'item'):  # torch/cupy scalar
+        _inf = xp.tensor(float('inf'), dtype=denom.dtype, device=denom.device) if hasattr(xp, 'tensor') else float('inf')
+        gcv = xp.where(denom > 1e-10, rss / n / (denom ** 2), _inf)
+    else:
+        gcv = rss / n / (denom ** 2) if denom > 1e-10 else float('inf')
+    return gcv
+def select_lambda_gcv(B, y, penalty_matrix, lambda_grid=None, xp=None):
+    """
+    Select smoothing parameter via Generalized Cross-Validation.
+    Searches over a grid of lambda values and selects the one that
+    minimizes the GCV score.
+    Parameters
+    ----------
+    B : array, shape (n, p)
+        Basis matrix.
+    y : array, shape (n,)
+        Response vector.
+    penalty_matrix : array, shape (p, p)
+        Penalty matrix.
+    lambda_grid : array-like, optional
+        Grid of lambda values to search over. If None, uses a
+        log-spaced grid from 1e-10 to 1e10.
+    xp : module, optional
+        Array module.
+    Returns
+    -------
+    best_lambda : float
+        Lambda value that minimizes GCV.
+    gcv_scores : array
+        GCV scores for each lambda in the grid.
+    """
+    xp = _get_xp(xp)
+    B = xp_asarray(B, dtype=xp.float64, xp=xp)
+    y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
+    penalty_matrix = xp_asarray(penalty_matrix, dtype=xp.float64, xp=xp, ref_arr=B)
+    if lambda_grid is None:
+        lambda_grid = xp.logspace(-10, 10, 100)
+    lambda_grid = xp_asarray(lambda_grid, dtype=xp.float64, xp=xp, ref_arr=B)
+    # GCV loop on device. penalized_ls and generalized_cross_validation
+    # return GPU scalars — no per-iteration sync.
+    gcv_list = []
+    for i in range(len(lambda_grid)):
+        gcv_val = generalized_cross_validation(
+            B, y, penalty_matrix, lambda_grid[i], xp
+        )
+        gcv_list.append(gcv_val)
+    gcv_vec = xp.stack(gcv_list)
+    gcv_np = _to_numpy(gcv_vec)  # single sync
+    best_idx = int(np.argmin(gcv_np))
+    best_lambda = float(_to_numpy(lambda_grid)[best_idx])
+    gcv_scores = xp_asarray(gcv_np, dtype=xp.float64, xp=xp, ref_arr=B)
+    return best_lambda, gcv_scores
+def fit_penalized_spline(x, y, knots, degree=3, penalty_order=2,
+                          lambda_=1.0, xp=None):
+    """
+    Fit a penalized spline to data.
+    Parameters
+    ----------
+    x : array-like, shape (n,)
+        Predictor variable.
+    y : array-like, shape (n,)
+        Response variable.
+    knots : array-like, shape (m,)
+        Interior knots.
+    degree : int, default=3
+        Spline degree.
+    penalty_order : int, default=2
+        Order of the difference penalty.
+    lambda_ : float, default=1.0
+        Smoothing parameter.
+    xp : module, optional
+        Array module.
+    Returns
+    -------
+    beta : array, shape (n_basis,)
+        Fitted spline coefficients.
+    edf : float
+        Effective degrees of freedom.
+    B : array, shape (n, n_basis)
+        Basis matrix.
+    S : array, shape (n_basis, n_basis)
+        Penalty matrix.
+    """
+    from statgpu.nonparametric.splines._bspline_basis import bspline_basis
+    xp = _get_xp(xp)
+    x = xp.asarray(x, dtype=xp.float64).ravel()
+    y = xp.asarray(y, dtype=xp.float64).ravel()
+    # Build basis matrix
+    B = bspline_basis(x, knots, degree=degree, xp=xp)
+    # Build penalty matrix
+    n_basis = B.shape[1]
+    S = difference_penalty(penalty_order, n_basis, xp)
+    # Solve penalized least squares
+    beta, edf = penalized_ls(B, y, S, lambda_, xp)
+    return beta, edf, B, S
+def predict_penalized_spline(x_new, beta, knots, degree=3, xp=None,
+                             boundary_lo=None, boundary_hi=None):
+    """
+    Predict using a fitted penalized spline.
+    Parameters
+    ----------
+    x_new : array-like, shape (n_new,)
+        New predictor values.
+    beta : array, shape (n_basis,)
+        Fitted spline coefficients.
+    knots : array-like, shape (m,)
+        Interior knots used for fitting.
+    degree : int, default=3
+        Spline degree.
+    xp : module, optional
+        Array module.
+    boundary_lo : float, optional
+        Lower boundary knot (from training data). Required for small batches.
+    boundary_hi : float, optional
+        Upper boundary knot (from training data). Required for small batches.
+    Returns
+    -------
+    y_pred : array, shape (n_new,)
+        Predicted values.
+    """
+    from statgpu.nonparametric.splines._bspline_basis import bspline_basis
+    xp = _get_xp(xp)
+    x_new = xp.asarray(x_new, dtype=xp.float64).ravel()
+    beta = xp.asarray(beta, dtype=xp.float64)
+    # Build basis matrix for new points, using training boundaries
+    B_new = bspline_basis(x_new, knots, degree=degree, xp=xp,
+                          boundary_lo=boundary_lo, boundary_hi=boundary_hi)
+    # Predict
+    y_pred = B_new @ beta
+    return y_pred

statgpu/panel/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""
+Panel data models with GPU acceleration.
+Provides fixed effects and random effects estimators for panel/longitudinal
+data, along with clustered covariance estimators.
+"""
+from ._fixed_effects import PanelOLS
+from ._random_effects import RandomEffects
+from ._covariance import clustered_covariance, two_way_clustered_covariance
+from ._utils import PanelSummary
+__all__ = [
+    'PanelOLS',
+    'RandomEffects',
+    'PanelSummary',
+    'clustered_covariance',
+    'two_way_clustered_covariance',
+]