PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/backends/_gpu_inference_cupy.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""
+GPU utility functions for full GPU computation.
+All statistical computations on GPU.
+"""
+import numpy as np
+from statgpu.inference._distributions_backend import (
+    norm,
+    t,
+    regularized_betainc_gpu,
+)
+def t_two_tail_pvalues_gpu(t_abs, df_resid):
+    """Backward-compatible alias for two-sided t p-values on GPU."""
+    return t.two_sided_pvalue(t_abs, df=df_resid)
+def t_crit_gpu_two_tail(alpha, df_resid, *, max_bisect_steps: int = 60):
+    """Backward-compatible alias for two-sided t critical value on GPU."""
+    return t.two_sided_critical_value(
+        alpha,
+        df=df_resid,
+        max_bisect_steps=max_bisect_steps,
+    )
+def norm_two_tail_pvalues_gpu(z_abs):
+    """Backward-compatible alias for two-sided normal p-values on GPU."""
+    return norm.two_sided_pvalue(z_abs)
+def norm_crit_gpu_two_tail(alpha):
+    """Backward-compatible alias for two-sided normal critical value on GPU."""
+    return norm.two_sided_critical_value(alpha)
+def compute_inference_gpu(X_design, resid, scale, df_resid, params_gpu):
+    """
+    Compute standard errors, t-values, p-values on GPU.
+    Parameters
+    ----------
+    X_design : cupy.ndarray
+        Design matrix on GPU.
+    resid : cupy.ndarray
+        Residuals on GPU.
+    scale : float or cupy.ndarray
+        Error variance estimate.
+    df_resid : int
+        Degrees of freedom.
+    params_gpu : cupy.ndarray
+        Parameters on GPU.
+    Returns
+    -------
+    bse_gpu : cupy.ndarray
+        Standard errors on GPU.
+    tvalues_gpu : cupy.ndarray
+        t-statistics on GPU.
+    pvalues_gpu : cupy.ndarray
+        p-values on GPU.
+    conf_int_gpu : cupy.ndarray
+        Confidence intervals on GPU.
+    """
+    import cupy as cp
+    # Compute (X'X)^-1 on GPU
+    XtX = cp.matmul(X_design.T, X_design)
+    try:
+        # Use Cholesky for inversion
+        L = cp.linalg.cholesky(XtX)
+        XtX_inv = cp.linalg.inv(XtX)  # Simpler but less stable
+    except Exception:
+        # Fallback to pseudo-inverse
+        XtX_inv = cp.linalg.pinv(XtX)
+    # Standard errors: sqrt(scale * diag((X'X)^-1))
+    bse_gpu = cp.sqrt(cp.maximum(scale * cp.diag(XtX_inv), 0.0))
+    # t-statistics (add epsilon to avoid division by zero for collinear features)
+    tvalues_gpu = params_gpu / (bse_gpu + 1e-30)
+    # p-values (two-tailed t-test), entirely on GPU.
+    pvalues_gpu = t.two_sided_pvalue(tvalues_gpu, df=df_resid)
+    # Confidence intervals (95%)
+    alpha = 0.05  # two-tailed significance level for 95% CI
+    t_crit_gpu = cp.asarray(
+        t.two_sided_critical_value(alpha, df=df_resid),
+        dtype=bse_gpu.dtype,
+    )
+    margin = t_crit_gpu * bse_gpu
+    conf_int_lower = params_gpu - margin
+    conf_int_upper = params_gpu + margin
+    conf_int_gpu = cp.stack([conf_int_lower, conf_int_upper], axis=1)
+    return bse_gpu, tvalues_gpu, pvalues_gpu, conf_int_gpu
+def compute_r2_gpu(y, resid):
+    """
+    Compute R-squared on GPU.
+    Parameters
+    ----------
+    y : cupy.ndarray
+        True values on GPU.
+    resid : cupy.ndarray
+        Residuals on GPU.
+    Returns
+    -------
+    r2 : float
+        R-squared value.
+    """
+    import cupy as cp
+    y_mean = y.mean()
+    ss_res = cp.sum(resid ** 2)
+    ss_tot = cp.sum((y - y_mean) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    return float(cp.asnumpy(r2))
+def compute_aic_bic_gpu(n, k, scale):
+    """
+    Compute AIC/BIC on GPU.
+    Parameters
+    ----------
+    n : int
+        Number of observations.
+    k : int
+        Number of parameters.
+    scale : float or cupy.ndarray
+        Error variance (MLE estimate: RSS/n).
+    Returns
+    -------
+    aic : float
+        AIC value.
+    bic : float
+        BIC value.
+    """
+    import cupy as cp
+    # Convert to cupy if needed
+    if not hasattr(scale, 'get'):
+        scale = cp.array(scale)
+    # AIC = n * log(scale) + 2*k
+    # BIC = n * log(scale) + k * log(n)
+    n_gpu = cp.array(float(n))
+    k_gpu = cp.array(float(k))
+    aic_gpu = n_gpu * cp.log(scale) + 2 * k_gpu
+    bic_gpu = n_gpu * cp.log(scale) + k_gpu * cp.log(n_gpu)
+    return float(cp.asnumpy(aic_gpu)), float(cp.asnumpy(bic_gpu))
+def compute_f_stat_gpu(y, resid, X_design, df_resid):
+    """
+    Compute F-statistic on GPU.
+    Parameters
+    ----------
+    y : cupy.ndarray
+        True values on GPU.
+    resid : cupy.ndarray
+        Residuals on GPU.
+    X_design : cupy.ndarray
+        Design matrix on GPU.
+    df_resid : int
+        Residual degrees of freedom.
+    Returns
+    -------
+    fvalue : float
+        F-statistic.
+    """
+    import cupy as cp
+    y_mean = y.mean()
+    ss_tot = cp.sum((y - y_mean) ** 2)
+    ss_res = cp.sum(resid ** 2)
+    ss_reg = ss_tot - ss_res
+    k = X_design.shape[1] - 1  # exclude intercept
+    if k == 0 or ss_res <= 0:
+        return np.inf
+    fvalue_gpu = (ss_reg / k) / (ss_res / df_resid)
+    fvalue = float(cp.asnumpy(fvalue_gpu))
+    # p-value on GPU using F CDF expressed via regularized incomplete beta.
+    #
+    # For F ~ F(d1, d2):
+    #   CDF(x) = I_{ d1 x / (d1 x + d2) }(d1/2, d2/2)
+    #   pvalue = 1 - CDF
+    d1 = float(k)
+    d2 = float(df_resid)
+    if d2 <= 0 or d1 <= 0:
+        pvalue = 1.0
+    else:
+        z = (d1 * fvalue) / (d1 * fvalue + d2)
+        cdf = regularized_betainc_gpu(d1 / 2.0, d2 / 2.0, cp.asarray(z))
+        pvalue = float(1.0 - cp.asnumpy(cdf))
+    return fvalue, pvalue

statgpu/backends/_gpu_inference_torch.py ADDED Viewed

@@ -0,0 +1,422 @@
+"""
+Torch-specific GPU utility functions for full GPU computation.
+This module mirrors _gpu_utils.py but uses PyTorch operations instead of CuPy.
+All statistical computations run on GPU via Torch.
+"""
+import numpy as np
+from statgpu.backends import _get_torch_device_str as _get_torch_device
+def _import_torch():
+    """Deferred torch import."""
+    try:
+        import torch
+        return torch
+    except ImportError as exc:
+        raise RuntimeError("PyTorch (torch) is required for Torch backend") from exc
+def t_two_tail_pvalues_torch(t_abs, df_resid, device=None):
+    """
+    Backward-compatible alias for two-sided t p-values on Torch GPU.
+    Parameters
+    ----------
+    t_abs : torch.Tensor or array-like
+        Absolute t-statistics.
+    df_resid : int or float
+        Residual degrees of freedom.
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    torch.Tensor
+        Two-sided p-values.
+    """
+    from statgpu.inference._distributions_backend import get_distribution
+    t_dist = get_distribution("t", backend="torch", device=device)
+    return t_dist.two_sided_pvalue(t_abs, df=df_resid)
+def t_crit_torch_two_tail_torch(alpha, df_resid, *, max_bisect_steps=60, device=None):
+    """
+    Backward-compatible alias for two-sided t critical value on Torch GPU.
+    Parameters
+    ----------
+    alpha : float
+        Significance level (e.g., 0.05 for 95% CI).
+    df_resid : int or float
+        Residual degrees of freedom.
+    max_bisect_steps : int, default=60
+        Maximum bisection iterations for quantile computation.
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    torch.Tensor
+        Critical t-value.
+    """
+    from statgpu.inference._distributions_backend import get_distribution
+    t_dist = get_distribution("t", backend="torch", device=device)
+    return t_dist.two_sided_critical_value(alpha, df=df_resid, max_bisect_steps=max_bisect_steps)
+def norm_two_tail_pvalues_torch(z_abs, device=None):
+    """
+    Backward-compatible alias for two-sided normal p-values on Torch GPU.
+    Parameters
+    ----------
+    z_abs : torch.Tensor or array-like
+        Absolute z-statistics.
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    torch.Tensor
+        Two-sided p-values.
+    """
+    from statgpu.inference._distributions_backend import norm
+    return norm.two_sided_pvalue(z_abs, backend="torch", device=device)
+def norm_crit_torch_two_tail_torch(alpha, device=None):
+    """
+    Backward-compatible alias for two-sided normal critical value on Torch GPU.
+    Parameters
+    ----------
+    alpha : float
+        Significance level (e.g., 0.05 for 95% CI).
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    torch.Tensor
+        Critical z-value.
+    """
+    from statgpu.inference._distributions_backend import norm
+    return norm.two_sided_critical_value(alpha, backend="torch")
+def compute_inference_torch(X_design, resid, scale, df_resid, params_torch, cov_type="nonrobust", device=None):
+    """
+    Compute standard errors, t-values, p-values, and confidence intervals on Torch GPU.
+    Parameters
+    ----------
+    X_design : torch.Tensor
+        Design matrix on GPU.
+    resid : torch.Tensor
+        Residuals on GPU.
+    scale : float or torch.Tensor
+        Error variance estimate (sigma^2).
+    df_resid : int
+        Degrees of freedom.
+    params_torch : torch.Tensor
+        Parameters on GPU.
+    cov_type : str, default='nonrobust'
+        Covariance type: 'nonrobust', 'hc0', 'hc1', 'hc2', 'hc3', 'hac'.
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    bse_torch : torch.Tensor
+        Standard errors on GPU.
+    tvalues_torch : torch.Tensor
+        t-statistics on GPU.
+    pvalues_torch : torch.Tensor
+        p-values on GPU.
+    conf_int_torch : torch.Tensor
+        Confidence intervals on GPU.
+    """
+    torch = _import_torch()
+    if device is None:
+        device = _get_torch_device()
+    from statgpu.inference._distributions_backend import get_distribution
+    t_dist = get_distribution("t", backend="torch", device=device)
+    # Compute (X'X)^-1 on GPU
+    XtX = torch.matmul(X_design.T, X_design)
+    try:
+        # Use Cholesky for inversion (more stable for positive definite)
+        L = torch.linalg.cholesky(XtX)
+        # Solve L @ L.T @ x = b for each column
+        XtX_inv = torch.cholesky_inverse(L)
+    except torch.linalg.LinAlgError:
+        # Fallback to pseudo-inverse
+        XtX_inv = torch.linalg.pinv(XtX)
+    # Handle HC2/HC3 leverage adjustment
+    if cov_type in ("hc2", "hc3"):
+        # Compute leverage values: h_ii = diag(X @ (X'X)^-1 @ X')
+        # Using Cholesky L where L @ L' = (X'X)^-1:
+        # h_ii = sum((X @ L) * (X @ L), dim=1) = diag(X @ L @ L' @ X')
+        XtX_inv_half = torch.linalg.cholesky(XtX_inv)
+        X_white = torch.matmul(X_design, XtX_inv_half)
+        leverage = torch.sum(X_white * X_white, dim=1)
+        leverage = torch.clamp(leverage, 0.0, 1.0 - 1e-12)
+        if cov_type == "hc2":
+            # HC2: e2 / (1 - h_ii)
+            e2 = torch.square(resid) / (1.0 - leverage)
+        else:
+            # HC3: e2 / (1 - h_ii)^2
+            e2 = torch.square(resid) / torch.square(1.0 - leverage)
+        # Sandwich: (X'X)^-1 @ (X' @ diag(e2) @ X) @ (X'X)^-1
+        Xw = X_design * e2[:, None]
+        meat = torch.matmul(X_design.T, Xw)
+        cov_params = torch.matmul(XtX_inv, torch.matmul(meat, XtX_inv))
+        bse_torch = torch.sqrt(torch.clamp(torch.diag(cov_params), 0.0))
+    elif cov_type == "hc1":
+        # HC1: sandwich with finite-sample correction
+        # meat = X' @ diag(resid^2) @ X * n/(n-k)
+        n, k = X_design.shape
+        df_scale = n / (n - k) if n > k else 1.0
+        e2 = torch.square(resid) * df_scale
+        Xw = X_design * e2[:, None]
+        meat = torch.matmul(X_design.T, Xw)
+        cov_params = torch.matmul(XtX_inv, torch.matmul(meat, XtX_inv))
+        bse_torch = torch.sqrt(torch.clamp(torch.diag(cov_params), 0.0))
+    else:
+        # Nonrobust (HC0-style): scale * diag((X'X)^-1)
+        bse_torch = torch.sqrt(scale * torch.clamp(torch.diag(XtX_inv), 0.0))
+    # t-statistics
+    tvalues_torch = params_torch / (bse_torch + 1e-30)
+    # p-values (two-tailed t-test), entirely on GPU
+    pvalues_torch = t_dist.two_sided_pvalue(tvalues_torch, df=df_resid)
+    # Confidence intervals (95%)
+    alpha = 0.05  # two-tailed significance level for 95% CI
+    t_crit = t_dist.two_sided_critical_value(alpha, df=df_resid)
+    margin = t_crit * bse_torch
+    conf_int_lower = params_torch - margin
+    conf_int_upper = params_torch + margin
+    conf_int_torch = torch.stack([conf_int_lower, conf_int_upper], dim=1)
+    return bse_torch, tvalues_torch, pvalues_torch, conf_int_torch
+def compute_r2_torch(y, resid):
+    """
+    Compute R-squared on Torch GPU.
+    Parameters
+    ----------
+    y : torch.Tensor
+        True values on GPU.
+    resid : torch.Tensor
+        Residuals on GPU.
+    Returns
+    -------
+    r2 : float
+        R-squared value.
+    """
+    torch = _import_torch()
+    y_mean = torch.mean(y)
+    ss_res = torch.sum(resid ** 2)
+    ss_tot = torch.sum((y - y_mean) ** 2)
+    r2 = 1 - ss_res / ss_tot
+    return float(r2.cpu().numpy())
+def compute_aic_bic_torch(n, k, scale, device=None):
+    """
+    Compute AIC/BIC on Torch GPU.
+    Parameters
+    ----------
+    n : int
+        Number of observations.
+    k : int
+        Number of parameters.
+    scale : float or torch.Tensor
+        Error variance (MLE estimate: RSS/n).
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    aic : float
+        AIC value.
+    bic : float
+        BIC value.
+    """
+    torch = _import_torch()
+    if device is None:
+        device = _get_torch_device()
+    # Convert to torch if needed
+    if not isinstance(scale, torch.Tensor):
+        scale = torch.tensor(scale, dtype=torch.float64, device=device)
+    # AIC = n * log(scale) + 2*k
+    # BIC = n * log(scale) + k * log(n)
+    n_tensor = torch.tensor(float(n), dtype=torch.float64, device=device)
+    k_tensor = torch.tensor(float(k), dtype=torch.float64, device=device)
+    aic_tensor = n_tensor * torch.log(scale) + 2 * k_tensor
+    bic_tensor = n_tensor * torch.log(scale) + k_tensor * torch.log(n_tensor)
+    return float(aic_tensor.cpu().numpy()), float(bic_tensor.cpu().numpy())
+def compute_f_stat_torch(y, resid, X_design, df_resid, device=None):
+    """
+    Compute F-statistic and p-value on Torch GPU.
+    Parameters
+    ----------
+    y : torch.Tensor
+        True values on GPU.
+    resid : torch.Tensor
+        Residuals on GPU.
+    X_design : torch.Tensor
+        Design matrix on GPU.
+    df_resid : int
+        Residual degrees of freedom.
+    device : str, optional
+        Torch device string.
+    Returns
+    -------
+    fvalue : float
+        F-statistic.
+    pvalue : float
+        p-value for F-statistic.
+    """
+    torch = _import_torch()
+    if device is None:
+        device = _get_torch_device()
+    from statgpu.inference._distributions_backend import get_distribution
+    f_dist = get_distribution("f", backend="torch", device=device)
+    y_mean = torch.mean(y)
+    ss_tot = torch.sum((y - y_mean) ** 2)
+    ss_res = torch.sum(resid ** 2)
+    ss_reg = ss_tot - ss_res
+    k = X_design.shape[1] - 1  # exclude intercept
+    if k == 0 or ss_res <= 0:
+        return float('inf'), 1.0
+    fvalue_tensor = (ss_reg / k) / (ss_res / df_resid)
+    fvalue = float(fvalue_tensor.cpu().numpy())
+    # p-value using F CDF
+    # For F ~ F(d1, d2): CDF(x) = I_{d1*x/(d1*x+d2)}(d1/2, d2/2)
+    d1 = float(k)
+    d2 = float(df_resid)
+    if d2 <= 0 or d1 <= 0:
+        pvalue = 1.0
+    else:
+        z = (d1 * fvalue) / (d1 * fvalue + d2)
+        cdf = f_dist.cdf(fvalue, dfn=d1, dfd=d2)
+        pvalue = 1.0 - float(cdf.cpu().numpy())
+    return fvalue, pvalue
+def torch_memory_cleanup():
+    """
+    Best-effort Torch memory cleanup.
+    Empties CUDA cache if available.
+    """
+    torch = _import_torch()
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.empty_cache()
+        except Exception:
+            pass
+def is_torch_tensor(x):
+    """Check if input is a Torch tensor."""
+    torch = _import_torch()
+    return isinstance(x, torch.Tensor)
+def to_numpy_from_torch(x):
+    """
+    Convert Torch tensor to NumPy array.
+    Handles both CPU and CUDA tensors.
+    """
+    torch = _import_torch()
+    if isinstance(x, torch.Tensor):
+        if x.is_cuda:
+            return x.detach().cpu().numpy()
+        return x.detach().numpy()
+    # Handle non-tensor inputs
+    if hasattr(x, 'get'):  # CuPy array
+        return x.get()
+    return np.asarray(x)
+def to_torch_from_numpy(x, device=None, dtype=None):
+    """
+    Convert NumPy array (or other types) to Torch tensor.
+    Parameters
+    ----------
+    x : array-like
+        Input data (NumPy, CuPy, or list).
+    device : str, optional
+        Target device ('cpu' or 'cuda').
+    dtype : torch.dtype, optional
+        Target dtype.
+    Returns
+    -------
+    torch.Tensor
+    """
+    torch = _import_torch()
+    if device is None:
+        device = _get_torch_device()
+    # Handle CuPy arrays
+    if hasattr(x, 'get'):
+        x = x.get()
+    # Handle Torch tensors
+    if isinstance(x, torch.Tensor):
+        if x.device.type != device:
+            x = x.to(device)
+        if dtype is not None and x.dtype != dtype:
+            x = x.to(dtype)
+        return x
+    # Convert to numpy first, then to torch
+    x_np = np.asarray(x)
+    tensor = torch.from_numpy(x_np).to(device)
+    if dtype is not None:
+        tensor = tensor.to(dtype)
+    return tensor