PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/linear_model/legacy/_solver_legacy.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""Legacy solver methods from _solver.py.
+DO NOT import in production code. Kept for reference only.
+"""
+from __future__ import annotations
+import numpy as np
+from statgpu.backends import _to_numpy
+from statgpu.backends._utils import _to_float_scalar
+from statgpu.backends._array_ops import _abs_sum_dev, _clip_grad_on_device, _copy_arr
+def fista_sqerr_adaptive_l1_fused(
+    X, y, penalty_weights, alpha,
+    XtX, Xty, yty, n_samples,
+    L_init, max_iter, tol,
+    backend, no_momentum=False,
+):
+    """Fused FISTA for squared_error + AdaptiveL1 with pre-computed XtX/Xty.
+    Eliminates:
+    - Redundant X@coef matmul (uses XtX instead)
+    - GPU→CPU syncs (convergence check deferred)
+    - Element-wise kernel overhead (fused update+proximal+momentum)
+    Parameters
+    ----------
+    X, y : array (centered)
+    penalty_weights : array (p,) — LLA weights
+    alpha : float — penalty alpha
+    XtX, Xty, yty : pre-computed
+    n_samples : int
+    L_init : float — initial Lipschitz
+    max_iter, tol : FISTA params
+    backend : 'torch' or 'cupy'
+    no_momentum : bool
+    Returns
+    -------
+    coef : array (p,)
+    n_iter : int
+    """
+    p = XtX.shape[0]
+    step = 1.0 / L_init
+    L = L_init
+    if backend == "torch":
+        import torch
+        thresh = torch.tensor(
+            alpha * penalty_weights * step,
+            device=XtX.device, dtype=XtX.dtype,
+        )
+        coef = torch.zeros(p, device=XtX.device, dtype=XtX.dtype)
+        coef_old = coef.clone()
+        y_k = coef.clone()
+        _fused = _get_sqerr_proximal_torch()
+        # Pre-allocate for momentum-free case
+        _zero_beta = 0.0
+    else:
+        import cupy as cp
+        thresh = cp.asarray(alpha * penalty_weights * step, dtype=cp.float64)
+        coef = cp.zeros(p, dtype=cp.float64)
+        coef_old = coef.copy()
+        y_k = coef.copy()
+        _fused = _get_sqerr_proximal_cupy()
+        _zero_beta = 0.0
+    t_k = 1.0
+    _sync_interval = 10  # Only check convergence every N iterations
+    iteration = -1  # default if max_iter=0
+    for iteration in range(max_iter):
+        # Gradient: grad = (XtX @ y_k - Xty) / n
+        grad = (XtX @ y_k - Xty) / n_samples
+        # Clip gradients (avoid sync — do it on GPU)
+        if iteration % 10 == 0:
+            grad = _clip_grad_on_device(grad, coef_old, backend)
+        # Proximal gradient step (no backtracking — Lipschitz is exact for squared_error)
+        # Pre-compute momentum coefficient so the fused kernel can apply it in one pass.
+        if no_momentum:
+            beta_mom = 0.0
+        else:
+            t_new = (1.0 + np.sqrt(1.0 + 4.0 * t_k * t_k)) / 2.0
+            beta_mom = (t_k - 1.0) / t_new
+        coef_new, y_k = _fused(y_k, grad, step, thresh, coef_old, beta_mom)
+        coef = coef_new
+        # Momentum state update
+        if not no_momentum:
+            t_k = t_new
+        # Convergence check (device-side, minimal sync)
+        if iteration < 20 or iteration % _sync_interval == 0:
+            coef_diff_dev = _abs_sum_dev(coef - coef_old)
+            if _to_float_scalar(coef_diff_dev) < tol:
+                break
+        coef_old = _copy_arr(coef)
+    return _to_numpy(coef), iteration + 1

statgpu/linear_model/penalized/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Penalized GLM models (split via mixin pattern)."""
+from ._base import PenalizedGeneralizedLinearModel, SelectivePenalty
+from ._penalized_linear import PenalizedLinearRegression
+from ._penalized_logistic import PenalizedLogisticRegression
+from ._penalized_poisson import PenalizedPoissonRegression
+from ._penalized_gamma import PenalizedGammaRegression
+from ._penalized_inverse_gaussian import PenalizedInverseGaussianRegression
+from ._penalized_negative_binomial import PenalizedNegativeBinomialRegression
+from ._penalized_tweedie import PenalizedTweedieRegression
+from ._penalized_cv import PenalizedGLM_CV, ApproximateCVWarning
+__all__ = [
+    "PenalizedGeneralizedLinearModel",
+    "SelectivePenalty",
+    "PenalizedLinearRegression",
+    "PenalizedLogisticRegression",
+    "PenalizedPoissonRegression",
+    "PenalizedGammaRegression",
+    "PenalizedInverseGaussianRegression",
+    "PenalizedNegativeBinomialRegression",
+    "PenalizedTweedieRegression",
+    "PenalizedGLM_CV",
+    "ApproximateCVWarning",
+]

statgpu/linear_model/penalized/_base.py ADDED Viewed

@@ -0,0 +1,437 @@
+"""Core PenalizedGeneralizedLinearModel class and SelectivePenalty.
+This module contains the class definition, __init__, and core utility methods.
+Fit, inference, and predict methods live in separate mixin modules.
+"""
+from __future__ import annotations
+__all__ = ["PenalizedGeneralizedLinearModel", "SelectivePenalty"]
+from typing import Optional, Union, Dict, TYPE_CHECKING
+import numpy as np
+from statgpu._base import BaseEstimator
+from statgpu._config import Device
+from statgpu.cross_validation._base import INTERCEPT_CLIP_BOUND as _INTERCEPT_CLIP_BOUND
+from statgpu.linear_model._gaussian_inference import validate_cov_type, validate_hac_maxlags
+from statgpu.penalties._categories import NONSMOOTH as _NONSMOOTH_PENALTIES
+from ._fit_mixin import _PenalizedFitMixin
+from ._inference_mixin import _PenalizedInferenceMixin
+from ._predict_mixin import _PenalizedPredictMixin
+class SelectivePenalty:
+    """Penalty wrapper that leaves the last intercept coefficient free.
+    Created once per fit and reused across iterations. The inner penalty,
+    feature count p, and backend are set via ``configure()``.
+    """
+    def __init__(self):
+        self._pen = None
+        self._p = 0
+        self._backend = "numpy"
+        self._alpha = 0.0
+        self._l1_ratio = 0.0
+    def configure(self, pen, p, backend):
+        self._pen = pen
+        self._p = p
+        self._backend = backend
+        self._alpha = float(getattr(pen, "alpha", 0.0))
+        self._l1_ratio = float(getattr(pen, "l1_ratio", 0.0))
+        self.name = pen.name
+    def value(self, coef):
+        return self._pen.value(coef[:self._p])
+    def proximal(self, w, step, backend=None):
+        b = backend or self._backend
+        w_feat = w[:self._p]
+        result_feat = self._pen.proximal(w_feat, step, backend=b)
+        if b == "cupy":
+            import cupy as cp
+            result = cp.empty(w.shape[0], dtype=w.dtype)
+            result[:self._p] = result_feat
+            result[-1] = cp.clip(w[-1], -_INTERCEPT_CLIP_BOUND, _INTERCEPT_CLIP_BOUND)
+        elif b == "torch":
+            import torch
+            result = torch.empty(w.shape[0], dtype=w.dtype, device=w.device)
+            result[:self._p] = result_feat
+            result[-1] = torch.clamp(w[-1], -_INTERCEPT_CLIP_BOUND, _INTERCEPT_CLIP_BOUND)
+        else:
+            result = np.empty(w.shape[0], dtype=w.dtype)
+            result[:self._p] = result_feat
+            result[-1] = np.clip(w[-1], -_INTERCEPT_CLIP_BOUND, _INTERCEPT_CLIP_BOUND)
+        return result
+    def _smooth_alpha(self):
+        pname = str(self._pen.name).lower()
+        if pname == "l2":
+            return self._alpha
+        if pname == "elasticnet":
+            return self._alpha * (1.0 - self._l1_ratio)
+        raise ValueError("smooth solvers only support L2/ElasticNet penalties.")
+    def smooth_value(self, coef):
+        sa = self._smooth_alpha()
+        active = coef[:self._p]
+        if self._backend == "cupy":
+            import cupy as cp
+            return 0.5 * sa * cp.sum(active * active)
+        if self._backend == "torch":
+            import torch
+            return 0.5 * sa * torch.sum(active * active)
+        return 0.5 * sa * np.sum(active * active)
+    def smooth_gradient(self, coef):
+        sa = self._smooth_alpha()
+        if self._backend == "cupy":
+            import cupy as cp
+            grad = cp.zeros_like(coef)
+        elif self._backend == "torch":
+            import torch
+            grad = torch.zeros_like(coef)
+        else:
+            grad = np.zeros_like(coef)
+        grad[:self._p] = sa * coef[:self._p]
+        return grad
+    def smooth_hessian(self, coef):
+        """Return smooth penalty Hessian as a dense diagonal matrix.
+        WARNING: For p > ~1000, this allocates O(p^2) memory which may cause
+        OOM. Consider using the diagonal representation directly when available.
+        """
+        sa = self._smooth_alpha()
+        if self._backend == "cupy":
+            import cupy as cp
+            diag = cp.zeros(coef.shape[0], dtype=coef.dtype)
+            diag[:self._p] = sa
+            return cp.diag(diag)
+        if self._backend == "torch":
+            import torch
+            diag = torch.zeros(coef.shape[0], dtype=coef.dtype, device=coef.device)
+            diag[:self._p] = sa
+            return torch.diag(diag)
+        diag = np.zeros(coef.shape[0], dtype=coef.dtype)
+        diag[:self._p] = sa
+        return np.diag(diag)
+class PenalizedGeneralizedLinearModel(
+    _PenalizedFitMixin,
+    _PenalizedInferenceMixin,
+    _PenalizedPredictMixin,
+    BaseEstimator,
+):
+    """
+    Penalized generalized linear model with pluggable GLM loss and penalty.
+    Minimizes: loss(X, y, w) + penalty(w)
+    Parameters
+    ----------
+    loss : str, default='squared_error'
+        Loss function: 'squared_error', 'logistic', 'poisson', 'gamma',
+        'negative_binomial', 'tweedie', 'inverse_gaussian'.
+    penalty : str or Penalty
+        Penalty type: 'l1', 'l2', 'elasticnet', 'scad', 'mcp', 'adaptive_l1',
+        'group_lasso', 'group_scad', 'group_mcp', or a Penalty instance.
+    solver : str, default='auto'
+        Solver: 'auto', 'fista', 'fista_bb', 'irls', 'newton', 'lbfgs', 'exact'.
+        'auto' selects the best path for the resolved backend and loss/penalty
+        combination (see _SOLVER_DISPATCH_TABLE).
+    alpha : float, default=1.0
+        Regularization strength.
+    l1_ratio : float, default=0.5
+        Only used when penalty='elasticnet'.
+    penalty_kwargs : dict, optional
+        Additional arguments passed to the penalty constructor.
+    fit_intercept : bool, default=True
+        Whether to calculate the intercept.
+    max_iter : int, default=1000
+        Maximum number of iterations.
+    tol : float, default=1e-4
+        Tolerance for convergence.
+    device : str or Device, default='auto'
+        Computation device: 'cpu', 'cuda', or 'auto'.
+    cpu_solver : str, default='fista'
+        CPU solver: 'fista', 'fista_bb', or 'coordinate_descent'.
+    lipschitz_L : float, optional
+        Pre-computed Lipschitz constant.
+    gpu_memory_cleanup : bool, default=False
+        If True, free GPU memory pool after fitting.
+    Examples
+    --------
+    # Lasso
+    >>> model = PenalizedLinearRegression(penalty='l1', alpha=0.1)
+    # Ridge
+    >>> model = PenalizedLinearRegression(penalty='l2', alpha=1.0)
+    # Elastic Net
+    >>> model = PenalizedLinearRegression(
+    ...     penalty='elasticnet', alpha=0.1, l1_ratio=0.5
+    ... )
+    """
+    def __init__(
+        self,
+        loss: str = "squared_error",
+        penalty: Union[str, "Penalty"] = "l1",
+        alpha: float = 1.0,
+        l1_ratio: float = 0.5,
+        penalty_kwargs: Optional[Dict] = None,
+        fit_intercept: bool = True,
+        max_iter: int = 1000,
+        tol: float = 1e-4,
+        device: Union[str, Device] = Device.AUTO,
+        n_jobs: Optional[int] = None,
+        cpu_solver: str = "fista",
+        solver: str = "auto",
+        lipschitz_L: Optional[float] = None,
+        gpu_memory_cleanup: bool = False,
+        compute_inference: bool = False,
+        inference_method: str = "debiased",
+        cov_type: str = "nonrobust",
+        hac_maxlags: Optional[int] = None,
+        stopping: str = "coef_delta",
+        lla: bool = True,
+        max_lla_iters: int = 50,
+        lla_tol: float = 1e-6,
+        loss_kwargs: Optional[Dict] = None,
+    ):
+        super().__init__(device=device, n_jobs=n_jobs)
+        self.loss = loss
+        self.penalty = penalty
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
+        self.penalty_kwargs = penalty_kwargs or {}
+        self.fit_intercept = fit_intercept
+        self.max_iter = max_iter
+        self.tol = tol
+        # Preserve original string identity for sklearn clone() compatibility
+        _cpu_solver = cpu_solver.lower()
+        self.cpu_solver = cpu_solver if cpu_solver == _cpu_solver else _cpu_solver
+        _solver = solver.lower()
+        self.solver = solver if solver == _solver else _solver
+        self.lipschitz_L = lipschitz_L
+        self.gpu_memory_cleanup = gpu_memory_cleanup
+        self.compute_inference = compute_inference
+        _inference_method = inference_method.lower()
+        self.inference_method = inference_method if inference_method == _inference_method else _inference_method
+        self.cov_type = validate_cov_type(cov_type)
+        self.hac_maxlags = validate_hac_maxlags(hac_maxlags)
+        # Preserve original object identity for sklearn clone() compatibility
+        _stopping = str(stopping).lower()
+        self.stopping = stopping if stopping == _stopping else _stopping
+        self.lla = lla
+        self.max_lla_iters = max_lla_iters
+        self.lla_tol = lla_tol
+        self.loss_kwargs = loss_kwargs or {}
+        # Internal state
+        self._penalty: Optional["Penalty"] = None
+        self._lla_enabled = lla
+        self._max_lla_iters = max_lla_iters
+        self._lla_tol = lla_tol
+        self._lla_n_iters_ = 0
+        self.coef_ = None
+        self.intercept_ = None
+        self.n_iter_ = 0
+        self._X_design = None
+        self._y = None
+        self._resid = None
+        self._scale = None
+        self._nobs = None
+        self._df_resid = None
+        self._params = None
+        self._bse = None
+        self._tvalues = None
+        self._pvalues = None
+        self._conf_int = None
+        self._inference_result = None
+        self._feature_names = None
+        self._design_info = None
+        self._formula_has_intercept = None
+        self._selected_solver = None
+        self._selected_backend_name = None
+        self._init_coef = None
+        self._inference_precomputed = False
+        self._precomputed_gaussian_state = None
+        # Simultaneous inference state
+        self._conf_int_simultaneous = None
+        self._simultaneous_enabled = False
+        self._debiased_M_cpu = None
+        self._use_intercept = None  # formula-derived override; None = use fit_intercept
+    @property
+    def _effective_intercept(self):
+        """Return effective intercept flag. Formula path overrides via _use_intercept."""
+        if self._use_intercept is not None:
+            return self._use_intercept
+        return self.fit_intercept
+    def _resolve_penalty(self) -> "Penalty":
+        """Resolve penalty string or instance to a Penalty object."""
+        # Lazy import to avoid circular dependency
+        from statgpu.penalties import get_penalty, Penalty
+        if isinstance(self.penalty, Penalty):
+            return self.penalty
+        # Map "none"/"null" to l2 with alpha=0 (no regularization)
+        pen_name = str(self.penalty).lower().strip()
+        if pen_name in ("none", "null", ""):
+            return get_penalty("l2", alpha=0.0)
+        kwargs = {**self.penalty_kwargs, "alpha": self.alpha}
+        if pen_name in ("elasticnet", "en"):
+            kwargs["l1_ratio"] = self.l1_ratio
+        return get_penalty(pen_name, **kwargs)
+    def _resolve_loss(self):
+        """Resolve loss string to a GLMLoss object."""
+        from statgpu.glm_core import get_glm_loss
+        return get_glm_loss(self.loss, **self.loss_kwargs)
+    def _validate_solver_penalty(self):
+        """Validate solver/penalty combinations before backend dispatch."""
+        solver_name = self.solver
+        penalty_name = str(getattr(self._penalty, "name", self.penalty)).lower()
+        non_smooth = _NONSMOOTH_PENALTIES
+        if self.solver == "exact":
+            if self.loss != "squared_error" or penalty_name != "l2":
+                raise ValueError(
+                    "solver='exact' is only supported for squared-error L2/Ridge models."
+                )
+            return
+        if solver_name == "irls" and penalty_name != "l2":
+            raise ValueError(
+                "solver='irls' only supports smooth L2 penalized GLM objectives."
+            )
+        if solver_name in ("newton", "lbfgs") and penalty_name in non_smooth:
+            raise ValueError(
+                f"solver='{solver_name}' only supports smooth objectives; "
+                f"use solver='fista' for penalty='{penalty_name}'."
+            )
+    def _validate_inference_request(self):
+        """Reject unsupported penalized inference paths with a clear error.
+        Currently supported:
+        - squared_error + L2 (standard OLS inference)
+        - squared_error + L1/ElasticNet (debiased Lasso, cpu_ols_inference, bootstrap)
+        """
+        if not self.compute_inference:
+            return
+        penalty_name = str(getattr(self._penalty, "name", self.penalty)).lower()
+        if self.loss == "squared_error" and penalty_name == "l2":
+            return
+        inference_method = str(getattr(self, "inference_method", "debiased")).lower()
+        if penalty_name in ("l1", "elasticnet", "en"):
+            if "debiased" in inference_method:
+                return
+            if "cpu_ols" in inference_method or "gpu_ols" in inference_method:
+                return
+            if "bootstrap" in inference_method:
+                return
+        raise NotImplementedError(
+            f"compute_inference=True with penalty='{penalty_name}' and "
+            f"loss='{self.loss}' is not supported. Use inference_method='debiased', "
+            f"'cpu_ols_inference', or 'bootstrap' for L1/ElasticNet, "
+            f"or compute_inference=False to skip inference."
+        )
+    def _clear_inference_state(self):
+        self._X_design = None
+        self._y = None
+        self._resid = None
+        self._scale = None
+        self._nobs = None
+        self._df_resid = None
+        self._params = None
+        self._bse = None
+        self._tvalues = None
+        self._pvalues = None
+        self._conf_int = None
+        self._inference_result = None
+    def _family_for_loss(self):
+        # Cache on first call (avoid re-creating on every predict/score)
+        cached = getattr(self, '_family_cache', None)
+        if cached is not None:
+            return cached
+        from statgpu.glm_core._family import (
+            Binomial,
+            Gaussian,
+            Poisson,
+            Gamma,
+            InverseGaussian,
+            NegativeBinomial,
+            Tweedie,
+        )
+        if self.loss == "logistic":
+            fam = Binomial()
+        elif self.loss == "poisson":
+            fam = Poisson()
+        elif self.loss == "gamma":
+            fam = Gamma()
+        elif self.loss == "inverse_gaussian":
+            fam = InverseGaussian()
+        elif self.loss == "negative_binomial":
+            alpha = getattr(
+                getattr(self, "_loss", None),
+                "alpha",
+                getattr(self, "loss_kwargs", {}).get("alpha", 1.0),
+            )
+            fam = NegativeBinomial(alpha=alpha)
+        elif self.loss == "tweedie":
+            power = getattr(
+                getattr(self, "_loss", None),
+                "power",
+                getattr(self, "loss_kwargs", {}).get("power", 1.5),
+            )
+            fam = Tweedie(power=power)
+        else:
+            fam = Gaussian()
+        self._family_cache = fam
+        return fam
+    def _column_stack(self, arrays, backend_name):
+        if backend_name == "cupy":
+            import cupy as cp
+            return cp.column_stack(arrays)
+        if backend_name == "torch":
+            import torch
+            return torch.column_stack(arrays)
+        return np.column_stack(arrays)
+    def _ones(self, n, backend_name, ref):
+        if backend_name == "cupy":
+            import cupy as cp
+            return cp.ones(n, dtype=ref.dtype)
+        if backend_name == "torch":
+            import torch
+            return torch.ones(n, dtype=ref.dtype, device=ref.device)
+        return np.ones(n, dtype=getattr(ref, "dtype", np.float64))
+    def _selective_penalty(self, p, backend_name):
+        """Penalty wrapper that leaves the last intercept coefficient free.
+        Creates a fresh instance per call to avoid thread-local singleton
+        conflicts in nested CV within the same thread.
+        """
+        sp = SelectivePenalty()
+        sp.configure(self._penalty, p, backend_name)
+        return sp