PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/panel/_utils.py ADDED Viewed

@@ -0,0 +1,482 @@
+"""
+Panel data utility functions.
+Provides demeaning / within-transformation routines used by fixed effects
+and random effects estimators.  All functions accept an ``xp`` module
+(numpy / cupy / torch) so they work on any backend.
+Performance note: all group-level operations use scatter-add to compute
+group sums and counts in a single kernel launch, avoiding per-group
+Python loops and their associated GPU-CPU synchronization overhead.
+"""
+from __future__ import annotations
+__all__ = ["demean", "within_transform", "group_means"]
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+import numpy as np
+from statgpu.backends import xp_asarray, xp_copy, xp_ones, xp_zeros, _to_float_scalar, _to_numpy
+@dataclass
+class PanelSummary:
+    """Structured result container for panel model summaries.
+    Attributes
+    ----------
+    model_type : str
+        ``'PanelOLS'`` or ``'RandomEffects'``.
+    nobs : int
+        Number of observations.
+    df_resid : int
+        Residual degrees of freedom.
+    coef : ndarray, shape (k,)
+        Estimated coefficients.
+    bse : ndarray, shape (k,)
+        Standard errors.
+    tvalues : ndarray, shape (k,)
+        t-statistics.
+    pvalues : ndarray, shape (k,)
+        Two-sided p-values.
+    conf_int : ndarray, shape (k, 2)
+        Confidence intervals.
+    feature_names : list of str
+        Feature names (auto-generated as ``x1, x2, ...`` if not provided).
+    rsquared_within : float or None
+        Within R-squared (PanelOLS only).
+    cov_type : str or None
+        Covariance type (PanelOLS only).
+    entity_effects : bool or None
+        Whether entity effects were included (PanelOLS only).
+    time_effects : bool or None
+        Whether time effects were included (PanelOLS only).
+    variance_components : dict or None
+        ``{'sigma2_e': float, 'sigma2_a': float}`` (RandomEffects only).
+    theta : float or None
+        GLS transformation parameter (RandomEffects only).
+    alpha : float
+        Significance level for confidence intervals.
+    extra : dict
+        Additional model-specific metadata.
+    """
+    model_type: str
+    nobs: int
+    df_resid: int
+    coef: np.ndarray
+    bse: np.ndarray
+    tvalues: np.ndarray
+    pvalues: np.ndarray
+    conf_int: np.ndarray
+    feature_names: List[str]
+    rsquared_within: Optional[float] = None
+    cov_type: Optional[str] = None
+    entity_effects: Optional[bool] = None
+    time_effects: Optional[bool] = None
+    variance_components: Optional[Dict[str, float]] = None
+    theta: Optional[float] = None
+    alpha: float = 0.05
+    extra: Dict = field(default_factory=dict)
+    def __str__(self) -> str:
+        """Formatted text table."""
+        lines = []
+        lines.append("=" * 72)
+        lines.append(f"{'':>20}{self.model_type} Results")
+        lines.append("=" * 72)
+        if self.entity_effects is not None:
+            lines.append(f"Entity effects:     {str(self.entity_effects):>10}")
+        if self.time_effects is not None:
+            lines.append(f"Time effects:       {str(self.time_effects):>10}")
+        if self.cov_type is not None:
+            lines.append(f"Covariance type:    {self.cov_type:>10}")
+        lines.append(f"No. Observations:   {self.nobs:>10}")
+        lines.append(f"Degrees of Freedom: {self.df_resid:>10}")
+        if self.rsquared_within is not None:
+            lines.append(f"Within R-squared:   {self.rsquared_within:>10.4f}")
+        if self.variance_components is not None:
+            lines.append(f"sigma2_e:           {self.variance_components['sigma2_e']:>10.6f}")
+            lines.append(f"sigma2_a:           {self.variance_components['sigma2_a']:>10.6f}")
+        if self.theta is not None:
+            lines.append(f"theta (avg):        {self.theta:>10.4f}")
+        ci_label = f"[{self.alpha/2:.3f}" if self.alpha != 0.05 else "[0.025"
+        ci_label2 = f"{1-self.alpha/2:.3f}]" if self.alpha != 0.05 else "0.975]"
+        lines.append("-" * 72)
+        lines.append(f"{'':<12} {'coef':>10} {'std err':>10} {'t':>8} {'P>|t|':>10} {ci_label:>10} {ci_label2:>10}")
+        lines.append("-" * 72)
+        for i, name in enumerate(self.feature_names):
+            lines.append(
+                f"{name:<12} {self.coef[i]:>10.4f} {self.bse[i]:>10.4f} "
+                f"{self.tvalues[i]:>8.3f} {self.pvalues[i]:>10.4f} "
+                f"{self.conf_int[i, 0]:>10.4f} {self.conf_int[i, 1]:>10.4f}"
+            )
+        lines.append("=" * 72)
+        return "\n".join(lines)
+    def to_dict(self) -> Dict:
+        """Return a JSON-serializable dictionary."""
+        return {
+            'model_type': self.model_type,
+            'nobs': self.nobs,
+            'df_resid': self.df_resid,
+            'coef': self.coef.tolist(),
+            'bse': self.bse.tolist(),
+            'tvalues': self.tvalues.tolist(),
+            'pvalues': self.pvalues.tolist(),
+            'conf_int': self.conf_int.tolist(),
+            'feature_names': self.feature_names,
+            'rsquared_within': self.rsquared_within,
+            'cov_type': self.cov_type,
+            'entity_effects': self.entity_effects,
+            'time_effects': self.time_effects,
+            'variance_components': self.variance_components,
+            'theta': self.theta,
+            'alpha': self.alpha,
+        }
+def _scatter_add(xp, indices, values, n_groups):
+    """Scatter-add values into bins defined by indices.
+    Returns an array ``out`` of shape ``(n_groups,)`` where
+    ``out[j] = sum(values[indices == j])``.
+    Works across NumPy, CuPy, and PyTorch with a single kernel launch.
+    """
+    if hasattr(xp, 'scatter_add'):
+        # PyTorch: scatter_add(dim, index, src)
+        out = xp.zeros(n_groups, dtype=values.dtype, device=values.device)
+        out.scatter_add_(0, indices.long(), values)
+        return out
+    elif hasattr(xp, 'add') and hasattr(xp, 'zeros') and xp.__name__ == 'cupy':
+        # CuPy: use cupyx.scatter_add or cp.add.at
+        try:
+            out = xp.zeros(n_groups, dtype=values.dtype)
+            from cupyx import scatter_add as _scatter_add_cu
+            _scatter_add_cu(out, indices, values)
+            return out
+        except ImportError:
+            # Fallback: compute on CPU then transfer back to GPU
+            out_np = np.zeros(n_groups, dtype=values.dtype)
+            np.add.at(out_np, _to_numpy(indices), _to_numpy(values))
+            return xp.asarray(out_np)
+    else:
+        # NumPy: np.add.at
+        out = np.zeros(n_groups, dtype=values.dtype)
+        np.add.at(out, _to_numpy(indices), _to_numpy(values))
+        return out
+def _remap_to_contiguous(groups, xp):
+    """Remap group labels to contiguous 0..n_groups-1 indices.
+    Returns (indices, n_groups, unique_labels) where indices[i] is the
+    contiguous index of group groups[i].
+    """
+    groups_np = _to_numpy(groups).ravel()
+    unique_labels, indices_np = np.unique(groups_np, return_inverse=True)
+    n_groups = len(unique_labels)
+    indices = xp_asarray(indices_np, dtype=xp.int64, xp=xp, ref_arr=groups)
+    return indices, n_groups, unique_labels
+def within_transform(y, groups, xp=None):
+    """Remove group means (fixed-effect projection).
+    Computes ``y_within[i] = y[i] - mean(y[groups == g[i]])`` for every
+    observation.  Uses scatter-add for a single-kernel group reduction
+    instead of per-group Python loops.
+    Parameters
+    ----------
+    y : array-like, shape (n,)
+        Outcome vector.
+    groups : array-like, shape (n,)
+        Integer group labels.
+    xp : module, optional
+        Array module (numpy / cupy / torch).  Defaults to numpy.
+    Returns
+    -------
+    y_within : array, shape (n,)
+        Demeaned outcome.
+    """
+    if xp is None:
+        xp = np
+    y = xp_asarray(y, dtype=xp.float64, xp=xp).ravel()
+    groups = xp_asarray(groups, xp=xp, ref_arr=y).ravel()
+    # Remap groups to contiguous indices (single CPU sync for unique)
+    idx, n_groups, _ = _remap_to_contiguous(groups, xp)
+    # Group sums and counts via scatter-add (2 kernel launches total)
+    group_sums = _scatter_add(xp, idx, y, n_groups)
+    group_counts = _scatter_add(xp, idx, xp.ones_like(y), n_groups)
+    # Group means (element-wise, no loop)
+    group_means = group_sums / xp.maximum(group_counts, 1.0)
+    # Broadcast back: y_within = y - group_means[idx]
+    return y - group_means[idx]
+def make_group_dummies(groups, xp=None):
+    """Create dummy variable matrix from group labels.
+    Parameters
+    ----------
+    groups : array-like, shape (n,)
+        Integer group labels.
+    xp : module, optional
+        Array module.  Defaults to numpy.
+    Returns
+    -------
+    D : array, shape (n, n_groups)
+        Dummy matrix with ones indicating group membership.
+    """
+    if xp is None:
+        xp = np
+    groups = xp_asarray(groups, xp=xp).ravel()
+    n = len(groups)
+    idx, n_groups, _ = _remap_to_contiguous(groups, xp)
+    # Build dummy matrix using advanced indexing (no per-group loop)
+    D = xp_zeros((n, n_groups), xp.float64, xp, groups)
+    row_idx = xp.arange(n, device=getattr(groups, 'device', None)
+                        if hasattr(groups, 'device') else None)
+    D[row_idx, idx] = 1.0
+    return D
+def _within_transform_matrix(M, groups, xp):
+    """Remove group means from each column of matrix M (batched).
+    Uses scatter-add on the full matrix to compute all column-group
+    means in one pass, instead of looping over columns.
+    Parameters
+    ----------
+    M : array, shape (n, k)
+        Input matrix.
+    groups : array, shape (n,)
+        Integer group labels.
+    xp : module
+        Array module.
+    Returns
+    -------
+    M_within : array, shape (n, k)
+        Column-demeaned matrix.
+    """
+    n, k = M.shape
+    idx, n_groups, _ = _remap_to_contiguous(groups, xp)
+    # Compute group counts once (n_groups,) — reuse across all columns
+    ones_col = xp_ones(n, M.dtype, xp, M)
+    group_counts = _scatter_add(xp, idx, ones_col, n_groups)
+    inv_counts = 1.0 / xp.maximum(group_counts, 1.0)
+    # For each column, compute group sums and subtract
+    # This is still O(k) scatter-adds, but each operates on a full column
+    # which is much faster than per-group Python loops
+    result = M.copy() if hasattr(M, 'copy') else M.clone()
+    for j in range(k):
+        col = M[:, j]
+        group_sums_j = _scatter_add(xp, idx, col, n_groups)
+        group_means_j = group_sums_j * inv_counts
+        result[:, j] = col - group_means_j[idx]
+    return result
+def demean_variables(y, X, entity_ids, time_ids=None, xp=None,
+                     max_iter=100, tol=1e-10):
+    """Demean *y* and *X* for fixed-effects estimation.
+    If *time_ids* is also provided, performs two-way demeaning (entity
+    and time effects) using the alternating projection method (Mundlak
+    1978).  For balanced panels convergence occurs in one pass; for
+    unbalanced panels the iteration continues until the maximum change
+    across all variables is below *tol*.
+    Parameters
+    ----------
+    y : array-like, shape (n,)
+        Outcome vector.
+    X : array-like, shape (n, k)
+        Regressor matrix.
+    entity_ids : array-like, shape (n,)
+        Entity (individual) identifiers.
+    time_ids : array-like, shape (n,), optional
+        Time-period identifiers.  If provided, two-way demeaning is applied.
+    xp : module, optional
+        Array module.  Defaults to numpy.
+    max_iter : int, default=100
+        Maximum alternating-projection iterations for two-way FE.
+    tol : float, default=1e-10
+        Convergence tolerance for two-way FE (max absolute change).
+    Returns
+    -------
+    y_d : array, shape (n,)
+        Demeaned outcome.
+    X_d : array, shape (n, k)
+        Demeaned regressors.
+    """
+    if xp is None:
+        xp = np
+    X = xp_asarray(X, dtype=xp.float64, xp=xp)
+    if X.ndim == 1:
+        X = X.reshape(-1, 1)
+    y_d = xp_asarray(y, dtype=xp.float64, xp=xp).ravel()
+    X_d = X.copy() if hasattr(X, 'copy') else X.clone() if hasattr(X, 'clone') else X - 0.0
+    # Entity demeaning (skip if entity_ids is None, e.g. time-only FE)
+    if entity_ids is not None:
+        y_d = within_transform(y_d, entity_ids, xp)
+        X_d = _within_transform_matrix(X_d, entity_ids, xp)
+    # Time demeaning (two-way FE) with alternating projection
+    # Each iteration applies BOTH entity and time demeaning to ensure
+    # convergence to the true two-way fixed effects (Mundlak 1978).
+    if time_ids is not None:
+        for iteration in range(max_iter):
+            y_d_old = y_d.copy() if hasattr(y_d, 'copy') else y_d.clone()
+            # Alternate: entity demeaning then time demeaning
+            # Only apply entity demeaning if entity_ids is provided (two-way FE)
+            if entity_ids is not None:
+                y_d = within_transform(y_d, entity_ids, xp)
+                X_d = _within_transform_matrix(X_d, entity_ids, xp)
+            y_d = within_transform(y_d, time_ids, xp)
+            X_d = _within_transform_matrix(X_d, time_ids, xp)
+            # Check convergence (single sync)
+            max_change = _to_float_scalar(xp.max(xp.abs(y_d - y_d_old)))
+            if max_change < tol:
+                break
+    return y_d, X_d
+def group_means(y, groups, xp=None):
+    """Compute group-level means aligned to each observation.
+    Returns an array of shape (n,) where element *i* is the mean of *y*
+    over all observations belonging to the same group as observation *i*.
+    Uses scatter-add for single-kernel group reduction.
+    Parameters
+    ----------
+    y : array-like, shape (n,)
+        Outcome vector.
+    groups : array-like, shape (n,)
+        Group labels.
+    xp : module, optional
+        Array module.  Defaults to numpy.
+    Returns
+    -------
+    y_bar : array, shape (n,)
+        Group means aligned to each observation.
+    """
+    if xp is None:
+        xp = np
+    y = xp_asarray(y, dtype=xp.float64, xp=xp).ravel()
+    groups = xp_asarray(groups, xp=xp, ref_arr=y).ravel()
+    idx, n_groups, _ = _remap_to_contiguous(groups, xp)
+    # Group sums and counts via scatter-add (2 kernel launches)
+    group_sums = _scatter_add(xp, idx, y, n_groups)
+    group_counts = _scatter_add(xp, idx, xp.ones_like(y), n_groups)
+    means = group_sums / xp.maximum(group_counts, 1.0)
+    return means[idx]
+def group_sizes(groups, xp=None):
+    """Return an array of per-observation group sizes.
+    Element *i* is the number of observations in the group of
+    observation *i*.
+    Uses scatter-add for single-kernel group counting.
+    Parameters
+    ----------
+    groups : array-like, shape (n,)
+        Group labels.
+    xp : module, optional
+        Array module.  Defaults to numpy.
+    Returns
+    -------
+    T_i : array, shape (n,)
+        Group size for each observation.
+    """
+    if xp is None:
+        xp = np
+    groups = xp_asarray(groups, xp=xp).ravel()
+    idx, n_groups, _ = _remap_to_contiguous(groups, xp)
+    # Group counts via scatter-add (1 kernel launch)
+    ones = xp_ones(len(groups), xp.float64, xp, groups)
+    counts = _scatter_add(xp, idx, ones, n_groups)
+    return counts[idx]
+def ols_inference_nonrobust(params, X, scale, df, alpha=0.05):
+    """Compute non-robust OLS inference (SE, t, p, CI).
+    Parameters
+    ----------
+    params : ndarray, shape (k,)
+        Estimated coefficients.
+    X : ndarray, shape (n, k)
+        Design matrix (numpy).
+    scale : float
+        Residual variance (RSS / df).
+    df : int
+        Residual degrees of freedom.
+    alpha : float
+        Significance level for confidence intervals.
+    Returns
+    -------
+    bse, tvalues, pvalues, conf_int : ndarrays
+    """
+    from scipy import stats
+    XtX = X.T @ X
+    try:
+        XtX_inv = np.linalg.inv(XtX)
+    except np.linalg.LinAlgError:
+        XtX_inv = np.linalg.pinv(XtX)
+    cov_params = scale * XtX_inv
+    bse = np.sqrt(np.diag(cov_params))
+    _eps = np.finfo(np.float64).tiny
+    tvalues = params / np.maximum(bse, _eps)
+    pvalues = 2 * (1 - stats.t.cdf(np.abs(tvalues), df))
+    t_crit = stats.t.ppf(1 - alpha / 2, df)
+    conf_int = np.column_stack([
+        params - t_crit * bse,
+        params + t_crit * bse,
+    ])
+    return bse, tvalues, pvalues, conf_int

statgpu/penalties/__init__.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""
+Penalty function registry for statgpu.
+Usage:
+    from statgpu.penalties import get_penalty, register_penalty
+    # Built-in
+    pen = get_penalty('l1', alpha=0.1)
+    # Custom
+    @register_penalty('custom')
+    class CustomPenalty(Penalty):
+        ...
+"""
+from ._base import Penalty, CompositePenalty
+from ._l1 import L1Penalty
+from ._l2 import L2Penalty
+from ._elasticnet import ElasticNetPenalty
+from ._scad import SCADPenalty
+from ._mcp import MCPPenalty
+from ._adaptive_l1 import AdaptiveL1Penalty
+from ._group_lasso import GroupLassoPenalty, AdaptiveGroupLassoPenalty
+from ._group_mcp import GroupMCPPenalty
+from ._group_scad import GroupSCADPenalty
+def _torch_compile_ok():
+    """Check if torch.compile is usable (CUDA capability >= 7.0 required)."""
+    try:
+        import torch
+        if torch.cuda.is_available():
+            cap = torch.cuda.get_device_capability()
+            return cap[0] >= 7
+        return True  # CPU-only torch can compile
+    except Exception:
+        return False
+__all__ = [
+    "Penalty",
+    "CompositePenalty",
+    "L1Penalty",
+    "L2Penalty",
+    "ElasticNetPenalty",
+    "SCADPenalty",
+    "MCPPenalty",
+    "AdaptiveL1Penalty",
+    "GroupLassoPenalty",
+    "AdaptiveGroupLassoPenalty",
+    "GroupMCPPenalty",
+    "GroupSCADPenalty",
+    "get_penalty",
+    "register_penalty",
+    "list_penalties",
+]
+_PENALTY_REGISTRY = {
+    "l1": L1Penalty,
+    "l2": L2Penalty,
+    "l2_squared": L2Penalty,
+    "ridge": L2Penalty,
+    "elasticnet": ElasticNetPenalty,
+    "en": ElasticNetPenalty,
+    "scad": SCADPenalty,
+    "mcp": MCPPenalty,
+    "adaptive_l1": AdaptiveL1Penalty,
+    "adaptive_lasso": AdaptiveL1Penalty,
+    "group_lasso": GroupLassoPenalty,
+    "gl": GroupLassoPenalty,
+    "group_mcp": GroupMCPPenalty,
+    "gmcp": GroupMCPPenalty,
+    "group_scad": GroupSCADPenalty,
+    "gscad": GroupSCADPenalty,
+}
+def get_penalty(name: str, **kwargs) -> Penalty:
+    """
+    Get a penalty by name from the registry.
+    Parameters
+    ----------
+    name : str
+        Penalty name: 'l1', 'l2', 'ridge', 'elasticnet', 'en'.
+    **kwargs
+        Arguments passed to the penalty constructor.
+    Returns
+    -------
+    Penalty
+        Instantiated penalty object.
+    Raises
+    ------
+    ValueError
+        If penalty name is not in the registry.
+    """
+    if name not in _PENALTY_REGISTRY:
+        available = list(_PENALTY_REGISTRY.keys())
+        raise ValueError(
+            f"Unknown penalty: {name}. Available penalties: {available}"
+        )
+    return _PENALTY_REGISTRY[name](**kwargs)
+def register_penalty(name: str):
+    """
+    Decorator to register a custom penalty class.
+    Parameters
+    ----------
+    name : str
+        Name to register the penalty under.
+    Returns
+    -------
+    callable
+        Decorator function that registers the penalty class.
+    Example
+    -------
+    >>> @register_penalty('huber')
+    ... class HuberPenalty(Penalty):
+    ...     ...
+    """
+    def decorator(cls):
+        if not issubclass(cls, Penalty):
+            raise TypeError(
+                f"Penalty class must inherit from Penalty, got {cls.__bases__}"
+            )
+        _PENALTY_REGISTRY[name] = cls
+        return cls
+    return decorator
+def list_penalties() -> list:
+    """List all registered penalty names."""
+    return list(_PENALTY_REGISTRY.keys())