PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/backends/_array_ops.py ADDED Viewed

@@ -0,0 +1,529 @@
+"""
+Backend utilities for GLM loss functions.
+Provides wrapper functions that dispatch to numpy/cupy/torch
+based on the input array type, so GLM loss functions can use
+a single code path for all backends.
+"""
+import numpy as np
+from statgpu.backends._base import _resolve_backend
+def _xp(arr):
+    """Get the array module (numpy/cupy/torch) from array type."""
+    mod = type(arr).__module__
+    if mod.startswith("cupy"):
+        import cupy
+        return cupy
+    if mod.startswith("torch"):
+        import torch
+        return torch
+    import numpy
+    return numpy
+def _clip(arr, lo, hi):
+    """Clip array values."""
+    xp = _xp(arr)
+    if xp.__name__ == "torch":
+        if lo is not None and hi is not None:
+            return xp.clamp(arr, min=lo, max=hi)
+        if lo is not None:
+            return xp.clamp(arr, min=lo)
+        if hi is not None:
+            return xp.clamp(arr, max=hi)
+        return arr
+    return xp.clip(arr, lo, hi)
+def _exp(arr):
+    """Element-wise exponential."""
+    xp = _xp(arr)
+    return xp.exp(arr)
+def _log(arr):
+    """Element-wise natural log."""
+    xp = _xp(arr)
+    return xp.log(arr)
+def _log1p(arr):
+    """Element-wise log(1+x)."""
+    xp = _xp(arr)
+    return xp.log1p(arr)
+def _sigmoid(arr):
+    """Numerically stable sigmoid: 1 / (1 + exp(-x))."""
+    xp = _xp(arr)
+    # float32 overflows exp() at ~89; float64 at ~709
+    dtype = getattr(arr, 'dtype', None)
+    max_val = 88.0 if dtype is not None and '32' in str(dtype) else 700.0
+    z = _clip(arr, -max_val, max_val)
+    if xp.__name__ == "torch":
+        return xp.sigmoid(z)
+    return 1.0 / (1.0 + xp.exp(-z))
+def _softplus(x):
+    """Numerically stable softplus: log(1 + exp(x))."""
+    xp = _xp(x)
+    if xp.__name__ == "torch":
+        import torch.nn.functional as F
+        return F.softplus(x)
+    return xp.log1p(xp.exp(-xp.abs(x))) + _clip(x, 0.0, None)
+def _sum(arr):
+    """Sum of all elements."""
+    xp = _xp(arr)
+    return xp.sum(arr)
+def _eigvalsh(arr):
+    """Eigenvalues of a symmetric matrix (sorted ascending)."""
+    xp = _xp(arr)
+    return xp.linalg.eigvalsh(arr)
+def _zeros_like(arr):
+    """Create zeros array with same shape/type as arr."""
+    xp = _xp(arr)
+    return xp.zeros_like(arr)
+def _zeros(n, backend, ref_tensor=None, dtype=None):
+    """Create a 1-D zeros vector on the requested backend."""
+    backend = _resolve_backend(backend, ref_tensor)
+    if backend == "numpy":
+        return np.zeros(n, dtype=dtype)
+    if backend == "cupy":
+        import cupy as cp
+        out_dtype = (
+            dtype if dtype is not None else getattr(ref_tensor, "dtype", cp.float64)
+        )
+        return cp.zeros(n, dtype=out_dtype)
+    import torch
+    device = getattr(ref_tensor, "device", "cpu") if ref_tensor is not None else "cpu"
+    out_dtype = dtype or (
+        getattr(ref_tensor, "dtype", torch.float64)
+        if ref_tensor is not None
+        else torch.float64
+    )
+    return torch.zeros(n, device=device, dtype=out_dtype)
+def _copy_arr(arr):
+    """Copy array: .clone() for torch, .copy() for numpy/cupy."""
+    if hasattr(arr, "clone"):
+        return arr.clone()
+    return arr.copy()
+def _diag(reg, backend="auto", ref_tensor=None, dtype=None):
+    """Create a diagonal matrix on the requested backend."""
+    backend = _resolve_backend(backend, ref_tensor, reg)
+    if backend == "cupy":
+        import cupy as cp
+        out_dtype = dtype if dtype is not None else getattr(reg, "dtype", cp.float64)
+        return cp.diag(cp.asarray(reg, dtype=out_dtype))
+    if backend == "torch":
+        import torch
+        device = (
+            ref_tensor.device
+            if ref_tensor is not None
+            else getattr(reg, "device", "cpu")
+        )
+        out_dtype = dtype or (
+            ref_tensor.dtype
+            if ref_tensor is not None
+            and getattr(ref_tensor, "is_floating_point", lambda: False)()
+            else reg.dtype
+            if hasattr(reg, "is_floating_point")
+            and reg.is_floating_point()
+            else torch.float64
+        )
+        return torch.diag(torch.as_tensor(reg, dtype=out_dtype, device=device))
+    arr = np.asarray(reg, dtype=dtype) if dtype is not None else reg
+    return np.diag(arr)
+def _to_backend(arr, backend="auto", ref_tensor=None, dtype=None):
+    """Convert an array to the requested backend, matching ref_tensor when needed."""
+    backend = _resolve_backend(backend, ref_tensor, arr)
+    if backend == "cupy":
+        import cupy as cp
+        out_dtype = dtype
+        if out_dtype is None:
+            ref_dtype = getattr(ref_tensor, "dtype", None)
+            if ref_dtype is not None and 'float' in str(ref_dtype):
+                out_dtype = ref_dtype
+            else:
+                out_dtype = cp.float64
+        return cp.asarray(arr, dtype=out_dtype)
+    if backend == "torch":
+        import torch
+        device = (
+            ref_tensor.device
+            if ref_tensor is not None
+            else getattr(arr, "device", "cpu")
+        )
+        out_dtype = dtype or (
+            ref_tensor.dtype
+            if ref_tensor is not None
+            and getattr(ref_tensor, "is_floating_point", lambda: False)()
+            else arr.dtype
+            if hasattr(arr, "is_floating_point")
+            and arr.is_floating_point()
+            else torch.float64
+        )
+        return torch.as_tensor(arr, dtype=out_dtype, device=device)
+    return np.asarray(arr, dtype=dtype or float)
+def _solve_linear_system(A, b, backend="auto"):
+    """Solve a linear system, falling back to least squares if singular."""
+    backend = _resolve_backend(backend, A)
+    try:
+        if backend == "torch":
+            import torch
+            b_col = b.unsqueeze(1) if b.ndim == 1 else b
+            sol = torch.linalg.solve(A, b_col)
+            return sol.squeeze(1) if b.ndim == 1 else sol
+        if backend == "cupy":
+            import cupy as cp
+            return cp.linalg.solve(A, b)
+        return np.linalg.solve(A, b)
+    except (np.linalg.LinAlgError, RuntimeError):
+        # LinAlgError for numpy/cupy singular matrices
+        # RuntimeError for torch singular matrices
+        if backend == "torch":
+            import torch
+            b_col = b.unsqueeze(1) if b.ndim == 1 else b
+            sol = torch.linalg.lstsq(A, b_col).solution
+            return sol.squeeze(1) if b.ndim == 1 else sol
+        if backend == "cupy":
+            import cupy as cp
+            return cp.linalg.lstsq(A, b)[0]
+        return np.linalg.lstsq(A, b, rcond=None)[0]
+def _eye_like(n, ref):
+    """Create an identity matrix on the same backend/device as ref."""
+    backend = _resolve_backend("auto", ref)
+    if backend == "cupy":
+        import cupy as cp
+        return cp.eye(n, dtype=ref.dtype)
+    if backend == "torch":
+        import torch
+        return torch.eye(n, dtype=ref.dtype, device=ref.device)
+    return np.eye(n, dtype=getattr(ref, "dtype", np.float64))
+def _sync_scalars(*dev_vals, backend):
+    """Batch device scalars into Python floats with one backend sync point."""
+    backend = _resolve_backend(backend, *dev_vals)
+    if backend == "numpy":
+        return tuple(float(v) for v in dev_vals)
+    if backend == "torch":
+        import torch
+        ref = next(
+            (
+                v
+                for v in dev_vals
+                if type(v).__module__.startswith("torch")
+            ),
+            None,
+        )
+        device = getattr(ref, "device", None)
+        dtype = getattr(ref, "dtype", torch.float64)
+        stacked = torch.stack(
+            [torch.as_tensor(v, device=device, dtype=dtype) for v in dev_vals]
+        )
+        return tuple(stacked[i].item() for i in range(len(dev_vals)))
+    import cupy as cp
+    stacked = cp.stack([cp.asarray(v) for v in dev_vals])
+    return tuple(float(stacked[i]) for i in range(len(dev_vals)))
+def _abs_sum(x):
+    """Sum of absolute values, returned as a Python scalar."""
+    xp = _xp(x)
+    if xp.__name__ == "torch":
+        return float(xp.sum(xp.abs(x)).item())
+    return float(xp.sum(xp.abs(x)))
+def _abs_max(x):
+    """Max absolute value, returned as a Python scalar."""
+    xp = _xp(x)
+    if xp.__name__ == "torch":
+        return float(xp.max(xp.abs(x)).item())
+    return float(xp.max(xp.abs(x)))
+def _norm2(x):
+    """L2 norm, returned as a Python scalar."""
+    xp = _xp(x)
+    if xp.__name__ == "torch":
+        return float(xp.linalg.norm(x).item())
+    return float(xp.linalg.norm(x))
+def _dot(a, b):
+    """Dot product, returned as a Python scalar."""
+    val = a.dot(b)
+    return float(val.item() if hasattr(val, "item") else val)
+def _dot_dev(a, b):
+    """Dot product staying on device for GPU backends."""
+    if isinstance(a, np.ndarray):
+        return float(a.dot(b))
+    return a.dot(b)
+def _sum_sq(x):
+    """Sum of squares, returned as a Python scalar."""
+    xp = _xp(x)
+    val = xp.sum(x ** 2)
+    return float(val.item() if hasattr(val, "item") else val)
+def _sum_sq_dev(x):
+    """Sum of squares staying on device for GPU backends."""
+    xp = _xp(x)
+    val = xp.sum(x ** 2)
+    if xp.__name__ == "numpy":
+        return float(val)
+    return val
+def _norm2_dev(x):
+    """L2 norm staying on device for GPU backends."""
+    xp = _xp(x)
+    val = xp.linalg.norm(x)
+    if xp.__name__ == "numpy":
+        return float(val)
+    return val
+def _abs_sum_dev(x):
+    """Sum of absolute values staying on device for GPU backends."""
+    xp = _xp(x)
+    val = xp.sum(xp.abs(x))
+    if xp.__name__ == "numpy":
+        return float(val)
+    return val
+def _device_leq(a, b):
+    """Device-side a <= b comparison, returned as a Python bool."""
+    backend = _resolve_backend("auto", a, b)
+    if backend == "torch":
+        return bool((a <= b).item())
+    if backend == "cupy":
+        return bool(a <= b)
+    return a <= b
+def _device_gt(a, b):
+    """Device-side a > b comparison, returned as a Python bool."""
+    backend = _resolve_backend("auto", a, b)
+    if backend == "torch":
+        return bool((a > b).item())
+    if backend == "cupy":
+        return bool(a > b)
+    return a > b
+def _clip_grad_on_device(grad, coef_old, backend):
+    """Clip gradient entirely on the selected backend."""
+    # Lazy import to avoid circular dependency (backends <-> solvers)
+    from statgpu.solvers._constants import (
+        _GRAD_CLIP_COEF_FACTOR, _GRAD_CLIP_ABS_FLOOR, _GRAD_CLIP_MAX,
+    )
+    if backend == "numpy":
+        gn = float(np.linalg.norm(grad))
+        ca = float(np.sum(np.abs(coef_old)))
+        gmax = max(ca * _GRAD_CLIP_COEF_FACTOR + _GRAD_CLIP_ABS_FLOOR, _GRAD_CLIP_MAX)
+        if gn > gmax:
+            return grad * (gmax / gn)
+        return grad
+    if backend == "torch":
+        import torch
+        gn_sq = torch.sum(grad ** 2)
+        coef_abs = torch.sum(torch.abs(coef_old))
+        gmax = coef_abs * _GRAD_CLIP_COEF_FACTOR + _GRAD_CLIP_ABS_FLOOR
+        gmax = torch.clamp(gmax, min=_GRAD_CLIP_MAX)
+        scale = torch.where(
+            gn_sq > gmax * gmax,
+            gmax / torch.sqrt(gn_sq + 1e-30),
+            torch.ones(1, device=grad.device, dtype=grad.dtype),
+        )
+        return grad * scale
+    import cupy as cp
+    gn_sq = cp.sum(grad ** 2)
+    coef_abs = cp.sum(cp.abs(coef_old))
+    gmax = cp.maximum(coef_abs * _GRAD_CLIP_COEF_FACTOR + _GRAD_CLIP_ABS_FLOOR, _GRAD_CLIP_MAX)
+    scale = cp.where(
+        gn_sq > gmax * gmax,
+        gmax / cp.sqrt(gn_sq + 1e-30),
+        cp.ones(1, dtype=grad.dtype),
+    )
+    return grad * scale
+def _max_eigval_power(mat, n_iter=20, tol=1e-8):
+    """Largest eigenvalue of a symmetric matrix via power iteration.
+    Much faster than full eigendecomposition, especially on GPU
+    where cuSOLVER eigvalsh has high kernel compilation overhead.
+    O(p^2) vs O(p^3). Accuracy within 1% for 20 iterations.
+    Parameters
+    ----------
+    mat : 2-d array (p, p), symmetric positive semi-definite.
+    n_iter : int
+        Max power iterations.
+    tol : float
+        Early stopping tolerance on eigenvalue change.
+    Returns
+    -------
+    float : max eigenvalue estimate.
+    """
+    xp = _xp(mat)
+    p = mat.shape[0]
+    dtype = getattr(mat, 'dtype', None)
+    # Build a deterministic but non-constant seed vector to avoid the
+    # pathological case where an all-ones vector is orthogonal to the top
+    # eigenspace (e.g., [[1,-1],[-1,1]]).
+    if xp.__name__ == "torch":
+        v = xp.arange(1, p + 1, dtype=dtype, device=mat.device)
+    elif dtype is not None:
+        v = xp.arange(1, p + 1, dtype=dtype)
+    else:
+        v = xp.arange(1, p + 1, dtype=xp.float64)
+    v_norm = xp.sqrt(xp.dot(v, v))
+    v_norm_val = float(v_norm)
+    if v_norm_val < 1e-15:
+        return 1.0
+    v = v / v_norm
+    if xp.__name__ == "numpy":
+        lambda_old = 0.0
+        lambda_new = 0.0
+        for _ in range(n_iter):
+            v_new = mat @ v
+            # Cache dot(v_new, v_new) to avoid recomputing mat @ v.
+            nv2 = xp.dot(v_new, v_new)
+            v_norm_sq = float(nv2)
+            if v_norm_sq < 1e-30:
+                return 1.0
+            v_norm = v_norm_sq ** 0.5
+            v = v_new / v_norm
+            # lambda = v^T A v = v^T v_new (v_new = A v, already computed)
+            lambda_new = float(xp.dot(v, v_new))
+            if lambda_old > 0 and abs(lambda_new - lambda_old) < tol * abs(lambda_new):
+                break
+            lambda_old = lambda_new
+        return lambda_new
+    lambda_old = 0.0
+    lambda_val = 0.0
+    for i in range(n_iter):
+        v_new = mat @ v
+        dot_vn_vn = xp.dot(v_new, v_new)
+        v_norm_sq = float(dot_vn_vn.item() if hasattr(dot_vn_vn, "item") else dot_vn_vn)
+        if v_norm_sq < 1e-30:
+            return 1.0  # Zero matrix — same fallback as numpy path
+        v_norm = v_norm_sq ** 0.5
+        v = v_new / v_norm
+        lambda_new = xp.dot(v, v_new)
+        lambda_val = float(lambda_new.item() if hasattr(lambda_new, "item") else lambda_new)
+        if i > 0 and abs(lambda_val - lambda_old) < tol * abs(lambda_val):
+            return lambda_val
+        lambda_old = lambda_val
+    return lambda_val
+def _soft_threshold(w, thresh):
+    """Soft-thresholding operator: sign(w) * max(|w| - thresh, 0).
+    Works across numpy/cupy/torch.  ``thresh`` may be a scalar or an
+    array with the same shape as ``w`` (adaptive weights).
+    Uses ``xp.where`` for fewer intermediate arrays (2 vs 4 with
+    sign*clip formulation).
+    """
+    xp = _xp(w)
+    abs_w = xp.abs(w)
+    # +0.0 eliminates negative zeros from sign(w)
+    return (xp.where(abs_w > thresh, abs_w - thresh, 0.0) * xp.sign(w)) + 0.0
+def _scalar_tensor(val, ref_arr):
+    """Create a scalar value compatible with *ref_arr*'s backend/device.
+    For torch, returns a 0-d tensor on the same device and dtype.
+    For cupy/numpy, returns a plain Python float (scalars work directly).
+    """
+    xp = _xp(ref_arr)
+    if xp.__name__ == "torch":
+        import torch
+        return torch.tensor(val, dtype=ref_arr.dtype, device=ref_arr.device)
+    return float(val)
+def _xp_copy(arr):
+    """Copy array on the same backend.  `.clone()` for torch, `.copy()` for others."""
+    xp = _xp(arr)
+    if xp.__name__ == "torch":
+        return arr.clone()
+    return arr.copy()
+def _xp_zeros(shape, dtype, ref_arr):
+    """Create zeros array on the same device/dtype as *ref_arr*."""
+    xp = _xp(ref_arr)
+    if xp.__name__ == "torch":
+        import torch
+        return torch.zeros(shape, dtype=dtype or ref_arr.dtype, device=ref_arr.device)
+    return xp.zeros(shape, dtype=dtype or getattr(ref_arr, 'dtype', None))
+def _xp_asarray(arr, dtype, ref_arr):
+    """Convert array to the same backend/device as *ref_arr*.
+    Handles numpy→cupy, numpy→torch, and same-backend dtype casts.
+    """
+    xp = _xp(ref_arr)
+    if xp.__name__ == "torch":
+        import torch
+        if isinstance(arr, torch.Tensor):
+            out = arr.to(dtype=dtype, device=ref_arr.device)
+        else:
+            out = torch.as_tensor(np.asarray(arr, dtype=np.float64),
+                                  dtype=dtype, device=ref_arr.device)
+        return out
+    if xp.__name__ == "cupy":
+        # Convert torch dtypes to numpy for cupy compatibility
+        if hasattr(dtype, '__module__') and 'torch' in str(getattr(dtype, '__module__', '')):
+            from statgpu.backends._utils import _torch_dtype_to_np
+            dtype = _torch_dtype_to_np(dtype)
+        return xp.asarray(arr, dtype=dtype)
+    return np.asarray(arr, dtype=dtype)
+def _xp_eye(n, dtype, ref_arr):
+    """Create identity matrix on the same device/dtype as *ref_arr*."""
+    xp = _xp(ref_arr)
+    if xp.__name__ == "torch":
+        import torch
+        return torch.eye(n, dtype=dtype or ref_arr.dtype, device=ref_arr.device)
+    return xp.eye(n, dtype=dtype or getattr(ref_arr, 'dtype', None))

statgpu/backends/_base.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""
+Abstract base class for compute backends.
+A backend wraps an array library (NumPy, CuPy, or PyTorch) and exposes a
+uniform interface so that model implementations can stay array-library agnostic.
+"""
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+import numpy as np
+# ---------------------------------------------------------------------------
+# Array-type detection helpers (deferred imports to avoid hard deps)
+# ---------------------------------------------------------------------------
+def _is_cupy_array(x: Any) -> bool:
+    """Return True if *x* is a CuPy ndarray."""
+    try:
+        import cupy as cp
+        return isinstance(x, cp.ndarray)
+    except Exception:
+        return False
+def _is_torch_array(x: Any) -> bool:
+    """Return True if *x* is a PyTorch Tensor."""
+    try:
+        import torch
+        return isinstance(x, torch.Tensor)
+    except Exception:
+        return False
+def _resolve_backend(backend: str, *arrays) -> str:
+    """Resolve the named *backend* string to one of ``'numpy'``, ``'cupy'``,
+    ``'torch'``.
+    Accepts legacy aliases ``'cpu'`` → ``'numpy'`` and ``'cuda'``/``'gpu'`` → ``'cupy'``.
+    When *backend* is ``'auto'``, inspect *arrays* and return the
+    matching backend name based on the first recognised array type.
+    Falls back to ``'numpy'`` when no array matches.
+    """
+    backend_name = str(backend).strip().lower()
+    backend_name = {"cpu": "numpy", "cuda": "cupy", "gpu": "cupy"}.get(
+        backend_name, backend_name
+    )
+    if backend_name not in ("auto", "numpy", "cupy", "torch"):
+        raise ValueError(
+            "backend must be one of: 'auto', 'numpy', 'cupy', 'torch', "
+            "or legacy aliases 'cpu', 'cuda', 'gpu'"
+        )
+    if backend_name != "auto":
+        return backend_name
+    for arr in arrays:
+        if arr is not None:
+            if _is_torch_array(arr):
+                return "torch"
+            if _is_cupy_array(arr):
+                return "cupy"
+    return "numpy"
+class BackendBase(ABC):
+    """
+    Abstract base for compute backends.
+    Subclasses wrap a specific array library and expose:
+    * ``xp``        – the underlying array module (numpy / cupy / torch).
+    * ``asarray``   – convert arbitrary inputs to the backend's native array.
+    * ``to_numpy``  – convert the backend's arrays back to ``numpy.ndarray``.
+    * ``is_available`` – runtime check for the library being usable.
+    The ``xp`` object follows the NumPy array API so that operations such as
+    ``xp.linalg.solve``, ``xp.sum``, ``xp.exp`` etc. work without
+    library-specific branches in the calling code.
+    """
+    #: Short name used in repr and config ('numpy', 'cupy', 'torch').
+    name: str = ""
+    # ------------------------------------------------------------------
+    # Abstract interface
+    # ------------------------------------------------------------------
+    @property
+    @abstractmethod
+    def xp(self) -> Any:
+        """Return the array module (numpy / cupy / torch)."""
+    @abstractmethod
+    def asarray(self, x, dtype=None) -> Any:
+        """
+        Convert *x* to this backend's native array type.
+        Parameters
+        ----------
+        x : array-like, numpy.ndarray, cupy.ndarray, or torch.Tensor
+            Input data.
+        dtype : dtype-like, optional
+            Desired data type.
+        Returns
+        -------
+        array
+            Native array on the backend's device.
+        """
+    @abstractmethod
+    def to_numpy(self, x) -> np.ndarray:
+        """
+        Convert *x* to a ``numpy.ndarray``.
+        Parameters
+        ----------
+        x : array-like
+            A native array produced by this backend (or any array-like).
+        Returns
+        -------
+        numpy.ndarray
+        """
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Return True if this backend can be used in the current environment."""
+    # ------------------------------------------------------------------
+    # Convenience helpers (non-abstract, built on top of xp)
+    # ------------------------------------------------------------------
+    def solve(self, A, b):
+        """Solve the linear system *Ax = b*."""
+        return self.xp.linalg.solve(A, b)
+    def lstsq(self, A, b, rcond=None):
+        """Return the least-squares solution to *Ax ≈ b*."""
+        return self.xp.linalg.lstsq(A, b, rcond=rcond)
+    def astype(self, arr, dtype):
+        """Cast *arr* to *dtype* (backend-agnostic .astype / .to)."""
+        return arr.astype(dtype)
+    def concatenate(self, arrays, axis=0):
+        """Concatenate *arrays* along *axis* (.concatenate / .cat)."""
+        return self.xp.concatenate(arrays, axis=axis)
+    def take_along_axis(self, arr, indices, axis):
+        """Gather elements along *axis* (.take_along_axis / .take_along_dim)."""
+        return self.xp.take_along_axis(arr, indices, axis=axis)
+    def cummin(self, arr, axis=0):
+        """Cumulative minimum along *axis*."""
+        return self.xp.minimum.accumulate(arr, axis=axis)
+    def cummax(self, arr, axis=0):
+        """Cumulative maximum along *axis*."""
+        return self.xp.maximum.accumulate(arr, axis=axis)
+    def flip(self, arr, axis=0):
+        """Reverse the order of elements along *axis*."""
+        return self.xp.flip(arr, axis=axis)
+    def copy(self, arr):
+        """Return a copy of *arr*."""
+        return arr.copy()
+    def reshape(self, arr, shape):
+        """Reshape *arr* to *shape*."""
+        return arr.reshape(shape)
+    def logsumexp(self, arr, axis=None):
+        """Log-sum-exp along *axis*."""
+        import numpy as np
+        xp = self.xp
+        m = xp.max(arr, axis=axis, keepdims=True)
+        return xp.squeeze(m, axis=axis) + xp.log(xp.sum(xp.exp(arr - m), axis=axis))
+    def __repr__(self) -> str:
+        available = "available" if self.is_available() else "unavailable"
+        return f"{self.__class__.__name__}(name={self.name!r}, {available})"