PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/semiparametric/_gam.py ADDED Viewed

@@ -0,0 +1,401 @@
+"""
+Generalized Additive Model (GAM) with GPU support.
+Implements GAM using penalized B-splines with automatic smoothing
+parameter selection via Generalized Cross-Validation (GCV).
+"""
+from __future__ import annotations
+__all__ = ["GAM"]
+import numpy as np
+from typing import Optional, Union
+from statgpu._base import BaseEstimator
+from statgpu._config import Device
+from statgpu.backends import _torch_dev, _to_numpy, xp_zeros, xp_ones, xp_asarray, xp_copy
+from statgpu.nonparametric.splines._bspline_basis import bspline_basis
+from statgpu.nonparametric.splines._penalized import (
+    difference_penalty,
+    penalized_ls,
+    generalized_cross_validation,
+    select_lambda_gcv,
+)
+class GAM(BaseEstimator):
+    """
+    Generalized Additive Model (GAM) using penalized B-splines.
+    Fits a smooth function for each feature using B-spline basis with
+    a difference penalty for smoothness. Smoothing parameters can be
+    specified or automatically selected via GCV.
+    The model is: y = alpha + sum_j f_j(x_j) + epsilon
+    where each f_j is represented as a penalized B-spline.
+    Parameters
+    ----------
+    n_splines : int, default=20
+        Number of basis functions per feature (before penalty reduction).
+    degree : int, default=3
+        Degree of B-spline basis (3 = cubic).
+    lam : float or None, default=None
+        Smoothing parameter. If None, automatically selected via GCV.
+    penalty_order : int, default=2
+        Order of difference penalty (2 = second differences).
+    device : str or Device, default='auto'
+        Computation device: 'cpu', 'cuda', or 'auto'.
+    n_jobs : int or None, default=None
+        Number of parallel jobs.
+    Attributes
+    ----------
+    coef_ : array, shape (n_features * n_splines + 1,)
+        Fitted coefficients (intercept + spline coefficients for each feature).
+    intercept_ : float
+        Intercept term.
+    edf_ : float
+        Total effective degrees of freedom.
+    gcv_score_ : float
+        GCV score (if lam was auto-selected).
+    lam_ : float
+        Smoothing parameter used (after auto-selection if applicable).
+    knots_ : list of arrays
+        Interior knots for each feature.
+    n_features_ : int
+        Number of features in training data.
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from statgpu.semiparametric import GAM
+    >>> X = np.random.randn(100, 3)
+    >>> y = np.sin(X[:, 0]) + 0.5 * X[:, 1] ** 2 + np.random.randn(100) * 0.1
+    >>> gam = GAM(n_splines=15, lam=1.0)
+    >>> gam.fit(X, y)
+    >>> y_pred = gam.predict(X)
+    """
+    def __init__(
+        self,
+        n_splines: int = 20,
+        degree: int = 3,
+        lam: Optional[float] = None,
+        penalty_order: int = 2,
+        device: Union[str, Device] = Device.AUTO,
+        n_jobs: Optional[int] = None,
+    ):
+        super().__init__(device=device, n_jobs=n_jobs)
+        self.n_splines = n_splines
+        self.degree = degree
+        self.lam = lam
+        self.penalty_order = penalty_order
+        # Fitted attributes
+        self.coef_ = None
+        self.intercept_ = None
+        self.edf_ = None
+        self.gcv_score_ = None
+        self.lam_ = None
+        self.knots_ = None
+        self.n_features_ = None
+    def _get_xp(self):
+        """Get the array module for computation.
+        Returns ``backend.xp`` (the raw array module) so callers can use
+        ``xp.asarray`` etc. directly.  Delegates to the parent's
+        ``_get_backend()`` which returns a ``BackendBase`` with correct
+        device/dtype handling.
+        """
+        backend = super()._get_backend(backend='auto')
+        return backend.xp
+    def _create_knots(self, x_col, n_splines, xp):
+        """
+        Create interior knots for a feature using quantiles.
+        Parameters
+        ----------
+        x_col : array, shape (n,)
+            Feature values.
+        n_splines : int
+            Number of basis functions.
+        xp : module
+            Array module.
+        Returns
+        -------
+        knots : array, shape (n_splines - degree - 1,)
+            Interior knots.
+        """
+        # Use quantiles for knot placement
+        # Exclude boundary knots (they'll be added by bspline_basis)
+        n_interior = n_splines - self.degree - 1
+        if n_interior <= 0:
+            raise ValueError(
+                f"n_splines ({n_splines}) must be greater than degree ({self.degree}) + 1"
+            )
+        # Use percentiles from 0 to 100, excluding boundaries
+        percentiles = np.linspace(0, 100, n_interior + 2)[1:-1]
+        # Convert to numpy for percentile computation
+        x_np = _to_numpy(x_col)
+        knots = np.percentile(x_np, percentiles)
+        # Remove duplicate knots (can happen with discrete data)
+        knots = np.unique(knots)
+        return xp_asarray(knots, dtype=xp.float64, xp=xp, ref_arr=x_col)
+    def _build_basis(self, X, xp):
+        """
+        Build combined basis matrix for all features.
+        Parameters
+        ----------
+        X : array, shape (n, p)
+            Input features.
+        xp : module
+            Array module.
+        Returns
+        -------
+        B : array, shape (n, 1 + sum(n_basis_j))
+            Combined basis matrix with intercept column.
+        penalty : array, shape (1 + sum(n_basis_j), 1 + sum(n_basis_j))
+            Block-diagonal penalty matrix (intercept not penalized).
+        """
+        n, p = X.shape
+        basis_blocks = []
+        penalty_blocks = []
+        total_basis = 0
+        for j in range(p):
+            x_col = X[:, j]
+            # Create knots for this feature
+            knots_j = self._create_knots(x_col, self.n_splines, xp)
+            self.knots_.append(knots_j)
+            # Store training boundary for prediction
+            self._boundary_lo_.append(float(xp.min(x_col)))
+            self._boundary_hi_.append(float(xp.max(x_col)))
+            # Build B-spline basis
+            B_j = bspline_basis(x_col, knots_j, degree=self.degree, xp=xp)
+            n_basis_j = B_j.shape[1]
+            # Build penalty matrix
+            S_j = difference_penalty(self.penalty_order, n_basis_j, xp)
+            basis_blocks.append(B_j)
+            penalty_blocks.append(S_j)
+            total_basis += n_basis_j
+        # Combine basis matrices: [1, B_1, B_2, ..., B_p]
+        intercept_col = xp_ones((n, 1), xp.float64, xp, X)
+        B = xp.hstack([intercept_col] + basis_blocks)
+        # Block-diagonal penalty with intercept dimension (not penalized)
+        # Size: (1 + total_basis, 1 + total_basis) to match B
+        full_size = 1 + total_basis
+        penalty = xp_zeros((full_size, full_size), xp.float64, xp, X)
+        offset = 1  # Skip intercept (row/col 0)
+        for S_j in penalty_blocks:
+            n_j = S_j.shape[0]
+            penalty[offset:offset + n_j, offset:offset + n_j] = S_j
+            offset += n_j
+        return B, penalty
+    def fit(self, X, y=None, **fit_params):
+        """
+        Fit the GAM model.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+        y : array-like, shape (n_samples,)
+            Target values.
+        Returns
+        -------
+        self : GAM
+            Fitted model.
+        """
+        xp = self._get_xp()
+        # Convert to arrays on the correct device
+        # For torch backend, ensure arrays land on CUDA (not CPU)
+        _ref = None
+        if xp.__name__ == "torch":
+            import torch
+            _dev = getattr(self, 'device', None)
+            if _dev is not None and hasattr(_dev, 'value') and _dev.value in ('cuda', 'torch'):
+                _ref = torch.empty(0, device="cuda")
+            elif torch.cuda.is_available():
+                _ref = torch.empty(0, device="cuda")
+        X = xp_asarray(X, dtype=xp.float64, xp=xp, ref_arr=_ref)
+        y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=X).ravel()
+        n, p = X.shape
+        self.n_features_ = p
+        self.knots_ = []
+        self._boundary_lo_ = []
+        self._boundary_hi_ = []
+        # Build basis matrix and penalty
+        B, penalty = self._build_basis(X, xp)
+        # Center spline basis columns (not intercept) so the intercept
+        # captures the overall mean of y.  This makes the intercept
+        # identifiable even though spline basis can represent constants.
+        self._basis_mean_ = xp.mean(B[:, 1:], axis=0)
+        B_centered = xp_copy(B)
+        B_centered[:, 1:] = B[:, 1:] - self._basis_mean_
+        # Select smoothing parameter
+        if self.lam is None:
+            # Auto-select via GCV
+            best_lam, gcv_scores = select_lambda_gcv(
+                B_centered, y, penalty, xp=xp
+            )
+            self.lam_ = best_lam
+            self.gcv_score_ = float(xp.min(gcv_scores))
+        else:
+            self.lam_ = self.lam
+            self.gcv_score_ = None
+        # Fit the model with centered basis
+        beta, edf = penalized_ls(B_centered, y, penalty, self.lam_, xp)
+        # Store results
+        self.coef_ = beta
+        self.intercept_ = float(beta[0])
+        self.edf_ = float(edf) if not isinstance(edf, float) else edf
+        self._fitted = True
+        # Store training data info for prediction
+        self._xp = xp
+        self._xp_asarray_ref_ = X  # device reference for xp_asarray
+        return self
+    def predict(self, X):
+        """
+        Predict using the fitted GAM model.
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Input features.
+        Returns
+        -------
+        y_pred : array, shape (n_samples,)
+            Predicted values.
+        """
+        self._check_is_fitted()
+        # Re-resolve backend to handle device changes since fit()
+        xp = self._get_xp()
+        X = xp_asarray(X, dtype=xp.float64, xp=xp, ref_arr=self._xp_asarray_ref_)
+        n, p = X.shape
+        if p != self.n_features_:
+            raise ValueError(
+                f"X has {p} features, but model was fitted with {self.n_features_}"
+            )
+        # Build basis for prediction (use training boundaries to avoid
+        # "knots must be strictly within boundary" errors on small batches)
+        basis_blocks = []
+        for j in range(p):
+            x_col = X[:, j]
+            knots_j = self.knots_[j]
+            B_j = bspline_basis(
+                x_col, knots_j, degree=self.degree, xp=xp,
+                boundary_lo=self._boundary_lo_[j],
+                boundary_hi=self._boundary_hi_[j],
+            )
+            basis_blocks.append(B_j)
+        # Combine: [1, B_1, B_2, ..., B_p]
+        intercept_col = xp_ones((n, 1), xp.float64, xp, X)
+        B = xp.hstack([intercept_col] + basis_blocks)
+        # Apply same centering as in fit
+        B[:, 1:] = B[:, 1:] - self._basis_mean_
+        # Predict
+        y_pred = B @ self.coef_
+        return _to_numpy(y_pred)
+    def summary(self):
+        """
+        Print a summary of the fitted GAM model.
+        Returns
+        -------
+        summary_dict : dict
+            Dictionary containing model summary information.
+        """
+        self._check_is_fitted()
+        summary_dict = {
+            'n_features': self.n_features_,
+            'n_splines_per_feature': self.n_splines,
+            'spline_degree': self.degree,
+            'penalty_order': self.penalty_order,
+            'smoothing_parameter': self.lam_,
+            'effective_df': self.edf_,
+            'intercept': self.intercept_,
+        }
+        if self.gcv_score_ is not None:
+            summary_dict['gcv_score'] = self.gcv_score_
+        print("=" * 50)
+        print("GAM Model Summary")
+        print("=" * 50)
+        print(f"Number of features: {self.n_features_}")
+        print(f"B-splines per feature: {self.n_splines}")
+        print(f"Spline degree: {self.degree}")
+        print(f"Penalty order: {self.penalty_order}")
+        print(f"Smoothing parameter (lambda): {self.lam_:.6g}")
+        print(f"Effective degrees of freedom: {self.edf_:.2f}")
+        print(f"Intercept: {self.intercept_:.6f}")
+        if self.gcv_score_ is not None:
+            print(f"GCV score: {self.gcv_score_:.6f}")
+        print("=" * 50)
+        return summary_dict
+    def get_params(self, deep=True):
+        """Get parameters for this estimator."""
+        params = super().get_params(deep)
+        params.update({
+            'n_splines': self.n_splines,
+            'degree': self.degree,
+            'lam': self.lam,
+            'penalty_order': self.penalty_order,
+        })
+        return params
+    def set_params(self, **params):
+        """Set parameters for this estimator."""
+        for key, value in params.items():
+            if key in ('n_splines', 'degree', 'lam', 'penalty_order'):
+                setattr(self, key, value)
+            else:
+                super().set_params(**{key: value})
+        return self

statgpu/solvers/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Generic optimization solvers for penalized loss functions.
+These solvers work with any loss that implements the GLMLoss interface
+(value, gradient, fused_value_and_gradient, lipschitz, hessian, preprocess)
+and any penalty with a proximal operator.
+"""
+__all__ = [
+    "fista_solver",
+    "fista_bb_solver",
+    "fista_lla_path",
+    "newton_solver",
+    "lbfgs_solver",
+    "admm_solver",
+    "ConvergenceWarning",
+]
+from ._convergence import ConvergenceWarning
+from ._fista import fista_solver
+from ._fista_bb import fista_bb_solver
+from ._fista_lla import fista_lla_path
+from ._newton import newton_solver
+from ._lbfgs import lbfgs_solver
+from ._admm import admm_solver

statgpu/solvers/_admm.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""ADMM solver for penalized GLM optimization.
+Reformulates min_w f(Xw; y) + p(w) as a consensus ADMM problem and solves
+via alternating direction method of multipliers. The w-update (smooth
+subproblem) uses either a direct Cholesky solve (for squared-error loss with
+moderate dimensionality) or Nesterov-accelerated gradient descent. The z-update
+reuses the penalty proximal operator and is element-wise / GPU-friendly.
+"""
+from __future__ import annotations
+import warnings
+import numpy as np
+from statgpu.backends import _resolve_backend
+from statgpu.backends._array_ops import (
+    _abs_sum_dev,
+    _copy_arr,
+    _device_leq,
+    _norm2_dev,
+    _sync_scalars,
+    _zeros,
+    _zeros_like,
+)
+from ._convergence import ConvergenceWarning
+from ._utils import (
+    _nesterov_momentum,
+    _validate_uniform_sample_weight,
+)
+__all__ = ["admm_solver"]
+def admm_solver(
+    loss: "GLMLoss",
+    penalty: "Penalty | None",
+    X,
+    y,
+    max_iter: int = 200,
+    tol: float = 1e-4,
+    rho: float = 1.0,
+    adaptive_rho: bool = True,
+    cg_max_iter: int = 30,
+    cg_tol: float = 1e-6,
+    init_coef=None,
+    sample_weight=None,
+) -> tuple:
+    """ADMM solver for penalized GLM optimization.
+    Reformulates min_w f(Xw; y) + p(w) as:
+        min_{w,z} f(Xw; y) + p(z)  s.t. w = z
+    and solves via the alternating direction method of multipliers:
+        w^{k+1} = argmin_w f(Xw; y) + (rho/2)||w - z^k + u^k||^2
+        z^{k+1} = prox_{p/rho}(w^{k+1} + u^k)
+        u^{k+1} = u^k + w^{k+1} - z^{k+1}
+    The w-update is a smooth, strongly convex problem solved via conjugate
+    gradient. The z-update reuses penalty.proximal(). Both are GPU-friendly:
+    w-update uses dense matmuls (cuBLAS), z-update is element-wise.
+    Supports numpy / cupy / torch backends via auto-detection of X.
+    Parameters
+    ----------
+    loss : GLMLoss
+    penalty : Penalty
+    X, y : arrays
+    max_iter : int
+        Maximum ADMM outer iterations.
+    tol : float
+        Convergence tolerance for primal/dual residuals.
+    rho : float
+        Augmented Lagrangian penalty parameter.
+    adaptive_rho : bool
+        Adapt rho based on primal/dual residual balance.
+    cg_max_iter : int
+        Maximum CG iterations for w-update subproblem.
+    cg_tol : float
+        CG convergence tolerance.
+    init_coef : array, optional
+        Initial coefficients.
+    sample_weight : array, optional
+    Returns
+    -------
+    coef : array, n_iter : int
+    """
+    backend = _resolve_backend("auto", X)
+    X_proc, y_proc = loss.preprocess(X, y)
+    n_features = X_proc.shape[1]
+    # Initialize
+    if init_coef is not None:
+        w = (
+            _copy_arr(init_coef)
+            if hasattr(init_coef, "copy") or hasattr(init_coef, "clone")
+            else np.array(init_coef).copy()
+        )
+    else:
+        w = _zeros(n_features, backend, ref_tensor=X)
+    z = _copy_arr(w)
+    u = _zeros_like(w)
+    if sample_weight is not None:
+        _validate_uniform_sample_weight(sample_weight, X_proc.shape[0], "admm_solver")
+    def _grad_w(w_vec, z_cur, u_cur):
+        """Gradient of f(w) + (rho/2)||w - z_cur + u_cur||^2 w.r.t. w."""
+        g = loss.gradient(X_proc, y_proc, w_vec, sample_weight=sample_weight)
+        g = g + rho * (w_vec - z_cur + u_cur)
+        return g
+    # Detect if loss supports Cholesky (constant Hessian, e.g. squared_error).
+    # For GLM losses, use Nesterov-accelerated gradient descent.
+    # When using Cholesky we pin rho (disable adaptive_rho) because the
+    # precomputed _A_mat = XtX/n + rho*I would become stale if rho changed.
+    use_cholesky = getattr(loss, '_supports_cholesky', False) and n_features <= 2000
+    if use_cholesky:
+        adaptive_rho = False
+    if use_cholesky:
+        _hess_const = loss.hessian(X_proc, y_proc, w)  # XtX / n
+        _A_mat = _hess_const
+        _cholesky_ok = False
+        if hasattr(_hess_const, 'shape'):
+            try:
+                if backend == "numpy":
+                    _A_mat = _hess_const + rho * np.eye(n_features, dtype=_hess_const.dtype)
+                    _L = np.linalg.cholesky(_A_mat)
+                elif backend == "cupy":
+                    import cupy as cp
+                    _A_mat = _hess_const + rho * cp.eye(n_features, dtype=_hess_const.dtype)
+                    _L = cp.linalg.cholesky(_A_mat)
+                else:
+                    import torch
+                    _A_mat = _hess_const + rho * torch.eye(n_features, dtype=_hess_const.dtype, device=_hess_const.device)
+                    _L = torch.linalg.cholesky(_A_mat)
+                _cholesky_ok = True
+            except (np.linalg.LinAlgError, ValueError, RuntimeError):
+                # Matrix not positive-definite (numerical issues, collinear features)
+                # Fall back to CG solver below
+                _cholesky_ok = False
+        if not _cholesky_ok:
+            use_cholesky = False
+        # Precompute -grad_f(0) = Xty/n for squared_error (the constant part)
+        _zero_coef = _zeros_like(w)
+        _neg_grad_zero = -loss.gradient(X_proc, y_proc, _zero_coef, sample_weight=sample_weight)  # Xty/n
+    else:
+        # Gradient descent step: 1/(L_f + rho)
+        L_f = loss.lipschitz(X_proc, w, y=y_proc)
+        if L_f <= 0:
+            L_f = 1.0
+        lr_sub = 1.0 / (L_f + rho + 1e-8)
+    iteration = -1  # default if max_iter=0
+    for iteration in range(max_iter):
+        z_old = _copy_arr(z)
+        # --- w-update ---
+        if use_cholesky:
+            # Closed-form: (XtX/n + rho*I) w = Xty/n + rho*(z - u)
+            # Use precomputed Cholesky factor for forward/back substitution
+            rhs = _neg_grad_zero + rho * (z - u)
+            if backend == "numpy":
+                from scipy.linalg import solve_triangular
+                tmp = solve_triangular(_L, rhs, lower=True)
+                w = solve_triangular(_L.T, tmp, lower=False)
+            elif backend == "cupy":
+                # Use triangular solve when available (O(n³/6) vs O(n³/3) for LU)
+                try:
+                    from cupyx.scipy.linalg import solve_triangular
+                    tmp = solve_triangular(_L, rhs, lower=True)
+                    w = solve_triangular(_L.T, tmp, lower=False)
+                except ImportError:
+                    tmp = cp.linalg.solve(_L, rhs)
+                    w = cp.linalg.solve(_L.T, tmp)
+            else:
+                tmp = torch.linalg.solve_triangular(_L, rhs.unsqueeze(1), upper=False)
+                w = torch.linalg.solve_triangular(_L.T, tmp, upper=True).squeeze(1)
+        else:
+            # Nesterov-accelerated gradient descent on the w-subproblem
+            w_new = _copy_arr(w)
+            w_mom = _copy_arr(w)
+            t_mom = 1.0
+            for _ in range(cg_max_iter):
+                w_old_mom = _copy_arr(w_new)
+                g_sub = _grad_w(w_mom, z, u)
+                w_next = w_mom - lr_sub * g_sub
+                beta_mom, t_mom = _nesterov_momentum(t_mom)
+                w_mom = w_next + beta_mom * (w_next - w_new)
+                w_new = w_next
+                diff_dev = _abs_sum_dev(w_next - w_old_mom)
+                if backend != "numpy":
+                    if _device_leq(diff_dev, cg_tol * n_features):
+                        break
+                elif diff_dev < cg_tol * n_features:
+                    break
+            w = w_new
+        # --- z-update: proximal operator ---
+        # Contract: proximal(z, step) = argmin_x step*P(x) + (1/2)||x - z||²
+        # ADMM z-update needs argmin_z P(z)/rho + (1/2)||z - (w+u)||²
+        #   = proximal(w + u, 1/rho)  with step = 1/rho
+        z = penalty.proximal(w + u, 1.0 / rho, backend=backend)
+        # --- u-update: dual ascent ---
+        u = u + w - z
+        # --- Adaptive rho + Convergence check (batched sync) ---
+        rp_dev = _norm2_dev(w - z)
+        rd_dev = _norm2_dev(z - z_old)
+        rp, rd_raw = _sync_scalars(rp_dev, rd_dev, backend=backend)
+        r_dual = rho * rd_raw
+        if adaptive_rho:
+            if rp > 10.0 * r_dual:
+                rho = min(rho * 2.0, 1e4)
+            elif r_dual > 10.0 * rp:
+                rho = max(rho * 0.5, 1e-4)
+            # Recompute step size to match updated rho
+            lr_sub = 1.0 / (L_f + rho + 1e-8)
+        if rp < tol and r_dual < tol:
+            break
+    # Return z (penalized/feasible variable), not w (unconstrained).
+    # At convergence w ≈ z, but z always satisfies the penalty structure.
+    n_iter = iteration + 1
+    if n_iter >= max_iter:
+        warnings.warn(
+            f"admm_solver did not converge within {max_iter} iterations "
+            f"(loss={getattr(loss, 'name', '?')}, penalty={getattr(penalty, 'name', '?')}).",
+            ConvergenceWarning,
+            stacklevel=2,
+        )
+    return z, n_iter

statgpu/solvers/_constants.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Solver convergence constants and thresholds."""
+_SLACK_TOLERANCE = 1e-14
+_DIVERGE_COEF_NORM_CAP = 100.0
+_DIVERGE_OBJ_RATIO = 100.0
+_DIVERGE_OBJ_ABS = 10.0
+_BB_RESTART_DOT_TOL = 1e-14
+_LIPSCHITZ_FLOOR = 1e-30
+_LIPSCHITZ_SAFETY_LOGISTIC_CV = 2.0
+# Gradient clipping thresholds (used by fista, fista_bb, fista_lla, _array_ops)
+# gmax = max(coef_norm * _GRAD_CLIP_COEF_FACTOR + _GRAD_CLIP_ABS_FLOOR, _GRAD_CLIP_MAX)
+_GRAD_CLIP_COEF_FACTOR = 10.0   # scales with coefficient magnitude
+_GRAD_CLIP_ABS_FLOOR = 1e3      # minimum gradient cap (prevents zero-cap at coef=0)
+_GRAD_CLIP_MAX = 1e4             # absolute maximum gradient cap

statgpu/solvers/_convergence.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Convergence warning for solvers."""
+class ConvergenceWarning(UserWarning):
+    """Solver did not converge within the iteration limit."""
+    pass