PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/nonparametric/kernel_smoothing/_kernel_common.py ADDED Viewed

@@ -0,0 +1,348 @@
+"""Shared utilities for kernel-based nonparametric estimators."""
+from __future__ import annotations
+import math
+from typing import Any, Union
+import numpy as np
+from statgpu.backends import (
+    _get_torch_device_str,
+    _get_xp,
+    _resolve_backend,
+    _to_float_scalar,
+    _to_numpy,
+    _torch_dev,
+    xp_arange,
+    xp_asarray,
+    xp_astype,
+    xp_empty,
+    xp_eye,
+    xp_full,
+    xp_maximum,
+    xp_ones,
+    xp_zeros,
+)
+# Re-export for backward compatibility
+__all__ = [
+    "_auto_backend_from_device",
+    "_as_points_2d",
+    "_as_samples_2d",
+    "_bandwidth_factor",
+    "_bandwidth_factor_1d_nrd",
+    "_effective_sample_size",
+    "_get_xp",
+    "_kernel_values_from_quad",
+    "_normalize_kernel_name",
+    "_normalize_regression_name",
+    "_normalize_weights",
+    "_resolve_backend",
+    "_stable_inv_and_det",
+    "_to_float_scalar",
+    "_to_numpy",
+    "_weighted_covariance",
+]
+def _torch_device_from_data(data) -> str:
+    """Extract device string from a torch tensor, or return 'cpu' for others."""
+    try:
+        import torch
+        if isinstance(data, torch.Tensor):
+            return str(data.device)
+    except (ImportError, AttributeError):
+        pass
+    return "cpu"
+def _auto_backend_from_device(device: str, prefer_torch: bool = False) -> str:
+    d = str(device).strip().lower()
+    if d in ("numpy", "cpu"):
+        return "numpy"
+    if d == "torch":
+        return "torch"
+    if d in ("cuda", "gpu"):
+        # Check if Torch is available and has CUDA
+        if prefer_torch:
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    return "torch"
+            except Exception:
+                pass
+        # Otherwise try CuPy
+        try:
+            import cupy as cp
+            _ = int(cp.cuda.runtime.getDeviceCount())
+            return "cupy"
+        except Exception:
+            # Fallback to Torch if CuPy unavailable
+            if not prefer_torch:
+                try:
+                    import torch
+                    if torch.cuda.is_available():
+                        return "torch"
+                except Exception:
+                    pass
+            raise RuntimeError(
+                f"No GPU backend (CuPy or Torch CUDA) is available for "
+                f"device='{device}'. Use device='auto' to fall back to CPU."
+            )
+    # Default: prefer CuPy, then Torch, then NumPy
+    try:
+        import cupy as cp
+        _ = int(cp.cuda.runtime.getDeviceCount())
+        return "cupy"
+    except Exception:
+        try:
+            import torch
+            if torch.cuda.is_available():
+                return "torch"
+        except Exception:
+            pass
+        return "numpy"
+def _normalize_kernel_name(kernel: str) -> str:
+    name = str(kernel).strip().lower()
+    aliases = {
+        "gaussian": "gaussian",
+        "normal": "gaussian",
+        "rectangular": "rectangular",
+        "uniform": "rectangular",
+        "box": "rectangular",
+        "triangular": "triangular",
+        "epanechnikov": "epanechnikov",
+        "epa": "epanechnikov",
+        "biweight": "biweight",
+        "quartic": "biweight",
+        "triweight": "triweight",
+        "cosine": "cosine",
+        "optcosine": "optcosine",
+    }
+    normalized = aliases.get(name)
+    if normalized is None:
+        raise ValueError(
+            "kernel must be one of: 'gaussian', 'rectangular', 'triangular', "
+            "'epanechnikov', 'biweight', 'triweight', 'cosine', 'optcosine'"
+        )
+    return normalized
+def _normalize_regression_name(regression: str) -> str:
+    name = str(regression).strip().lower()
+    aliases = {
+        "nw": "nw",
+        "nadaraya_watson": "nw",
+        "nadaraya-watson": "nw",
+        "local_linear": "local_linear",
+        "local-linear": "local_linear",
+        "ll": "local_linear",
+    }
+    normalized = aliases.get(name)
+    if normalized is None:
+        raise ValueError(
+            "regression must be one of: 'nw', 'nadaraya_watson', 'local_linear', 'll'"
+        )
+    return normalized
+def _kernel_values_from_quad(quad, kernel_name: str, xp):
+    if kernel_name == "gaussian":
+        return xp.exp(-0.5 * quad)
+    support_mask = quad <= 1.0
+    if kernel_name == "rectangular":
+        return xp_astype(support_mask, xp.float64, xp)
+    if kernel_name == "triangular":
+        return xp_maximum(1.0 - xp.sqrt(xp_maximum(quad, 0.0, xp)), 0.0, xp)
+    one_minus_quad = xp_maximum(1.0 - quad, 0.0, xp)
+    if kernel_name == "epanechnikov":
+        return one_minus_quad
+    if kernel_name == "biweight":
+        return one_minus_quad * one_minus_quad
+    if kernel_name == "triweight":
+        return one_minus_quad * one_minus_quad * one_minus_quad
+    if kernel_name == "cosine":
+        r = xp.sqrt(xp_maximum(quad, 0.0, xp))
+        return xp.where(support_mask, 0.5 * (1.0 + xp.cos(math.pi * r)), 0.0)
+    if kernel_name == "optcosine":
+        r = xp.sqrt(xp_maximum(quad, 0.0, xp))
+        return xp.where(support_mask, xp.cos(0.5 * math.pi * r), 0.0)
+    raise ValueError(f"Unsupported kernel: {kernel_name}")
+def _as_samples_2d(samples, xp, ref_arr=None):
+    arr = xp_asarray(samples, dtype=xp.float64, xp=xp, ref_arr=ref_arr)
+    if arr.ndim == 1:
+        arr = arr.reshape(-1, 1)
+    elif arr.ndim != 2:
+        raise ValueError("samples must be 1D or 2D")
+    n_samples = int(arr.shape[0])
+    if n_samples < 2:
+        raise ValueError("samples must contain at least 2 observations")
+    return arr
+def _as_points_2d(points, n_features: int, xp, ref_arr=None):
+    arr = xp_asarray(points, dtype=xp.float64, xp=xp, ref_arr=ref_arr)
+    if arr.ndim == 1:
+        if n_features == 1:
+            arr = arr.reshape(-1, 1)
+        elif int(arr.size) == n_features:
+            arr = arr.reshape(1, n_features)
+        else:
+            raise ValueError("points shape is incompatible with sample dimensionality")
+    elif arr.ndim != 2:
+        raise ValueError("points must be 1D or 2D")
+    if int(arr.shape[1]) != int(n_features):
+        raise ValueError("points feature dimension does not match samples")
+    return arr
+def _normalize_weights(weights, n_samples: int, xp, device: str = "cpu", ref_arr=None):
+    if weights is None:
+        fill_val = 1.0 / float(n_samples)
+        return xp_full(n_samples, fill_val, xp.float64, xp, ref_arr=ref_arr)
+    w = xp_asarray(weights, dtype=xp.float64, xp=xp, ref_arr=ref_arr).reshape(-1)
+    if int(w.size) != int(n_samples):
+        raise ValueError("weights must have the same length as samples")
+    if _to_float_scalar(xp.min(w)) < 0.0:
+        raise ValueError("weights must be non-negative")
+    w_sum = xp.sum(w)
+    if _to_float_scalar(w_sum) <= 0.0:
+        raise ValueError("weights must sum to a positive value")
+    return w / w_sum
+def _effective_sample_size(weights, xp) -> float:
+    w2 = xp.sum(weights * weights)
+    denom = _to_float_scalar(w2)
+    if denom <= 0.0:
+        raise ValueError("invalid weights: effective sample size denominator is non-positive")
+    return 1.0 / denom
+def _bandwidth_factor_1d_nrd(
+    method: str,
+    *,
+    n_eff: float,
+    samples_2d,
+    data_cov,
+    xp,
+) -> float:
+    method_n = str(method).strip().lower()
+    if method_n not in ("nrd0", "nrd"):
+        raise ValueError("method must be one of: 'nrd0', 'nrd'")
+    x = np.asarray(_to_numpy(samples_2d[:, 0]), dtype=np.float64)
+    x = x[np.isfinite(x)]
+    if x.size < 2:
+        raise ValueError("need at least 2 finite samples for 'nrd0'/'nrd' bandwidth")
+    sd = float(np.std(x, ddof=1))
+    q75, q25 = np.quantile(x, [0.75, 0.25])
+    robust = float((q75 - q25) / 1.34)
+    scale = min(sd, robust) if np.isfinite(robust) and robust > 0.0 else sd
+    if (not np.isfinite(scale)) or scale <= 0.0:
+        scale = float(np.std(x, ddof=0))
+    if (not np.isfinite(scale)) or scale <= 0.0:
+        raise ValueError("unable to compute positive scale for 'nrd0'/'nrd' bandwidth")
+    coeff = 0.9 if method_n == "nrd0" else 1.06
+    bw_abs = float(coeff * scale * (float(n_eff) ** (-1.0 / 5.0)))
+    if (not np.isfinite(bw_abs)) or bw_abs <= 0.0:
+        raise ValueError("automatic bandwidth rule produced a non-positive value")
+    data_sd = math.sqrt(max(_to_float_scalar(data_cov[0, 0]), 0.0))
+    if data_sd <= 0.0 or (not np.isfinite(data_sd)):
+        data_sd = max(float(np.finfo(np.float64).tiny), sd)
+    factor = float(bw_abs / data_sd)
+    if (not np.isfinite(factor)) or factor <= 0.0:
+        raise ValueError("bandwidth factor must be a finite positive scalar")
+    return factor
+def _bandwidth_factor(
+    bandwidth: Union[str, float, int],
+    *,
+    n_eff: float,
+    n_features: int,
+) -> float:
+    if isinstance(bandwidth, str):
+        method = bandwidth.strip().lower()
+        if method == "scott":
+            factor = n_eff ** (-1.0 / (n_features + 4.0))
+        elif method == "silverman":
+            factor = (n_eff * (n_features + 2.0) / 4.0) ** (-1.0 / (n_features + 4.0))
+        else:
+            raise ValueError(
+                "bandwidth must be one of: 'scott', 'silverman', 'nrd0', 'nrd', "
+                "'ucv', 'bcv', 'sj', 'sj-ste', 'sj-dpi', 'cv', 'cv_ls', 'cv-nw', 'cv-ll', "
+                "or a positive scalar"
+            )
+    else:
+        factor = float(bandwidth)
+    if not np.isfinite(factor) or factor <= 0.0:
+        raise ValueError("bandwidth factor must be a finite positive scalar")
+    return float(factor)
+def _weighted_covariance(samples_2d, weights_1d, xp):
+    n_features = int(samples_2d.shape[1])
+    mean = xp.sum(samples_2d * weights_1d[:, None], axis=0)
+    centered = samples_2d - mean
+    denom = 1.0 - xp.sum(weights_1d * weights_1d)
+    denom_f = _to_float_scalar(denom)
+    if denom_f <= 1e-15:
+        raise ValueError("effective degrees of freedom is too small for covariance estimation")
+    cov = (centered.T * weights_1d[None, :]) @ centered / denom
+    cov = 0.5 * (cov + cov.T)
+    trace = _to_float_scalar(xp.trace(cov))
+    base = trace / float(max(1, n_features)) if np.isfinite(trace) else 1.0
+    jitter = max(base * 1e-12, 1e-12)
+    cov = cov + jitter * xp_eye(n_features, xp.float64, xp, ref_arr=cov)
+    return cov
+def _stable_inv_and_det(cov, xp):
+    n_features = int(cov.shape[0])
+    cov_work = xp_astype(cov, xp.float64, xp)
+    trace = _to_float_scalar(xp.trace(cov_work))
+    base = trace / float(max(1, n_features)) if np.isfinite(trace) else 1.0
+    jitter = max(base * 1e-12, 1e-12)
+    last_err = None
+    for _ in range(8):
+        try:
+            inv_cov = xp.linalg.inv(cov_work)
+            det_cov = _to_float_scalar(xp.linalg.det(cov_work))
+            if np.isfinite(det_cov) and det_cov > 0.0:
+                return inv_cov, det_cov, cov_work
+        except Exception as exc:
+            last_err = exc
+        cov_work = cov_work + jitter * xp_eye(n_features, xp.float64, xp, ref_arr=cov_work)
+        jitter *= 10.0
+    if last_err is not None:
+        raise ValueError("covariance inversion failed") from last_err
+    raise ValueError("covariance matrix is not positive definite")