statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""MCP-penalized regression (Zhang, Annals of Statistics 2010)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Union
|
|
6
|
+
|
|
7
|
+
from statgpu._config import Device
|
|
8
|
+
from statgpu.linear_model.penalized._penalized_linear import PenalizedLinearRegression
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MCPRegression(PenalizedLinearRegression):
|
|
12
|
+
"""MCP-penalized regression.
|
|
13
|
+
|
|
14
|
+
Non-convex penalty with oracle property. Uses LLA+FISTA for optimization.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
alpha : float, default=1.0
|
|
19
|
+
Regularization strength.
|
|
20
|
+
gamma : float, default=3.0
|
|
21
|
+
Concavity parameter (Zhang recommends gamma > 1).
|
|
22
|
+
fit_intercept : bool, default=True
|
|
23
|
+
Whether to calculate the intercept.
|
|
24
|
+
max_iter : int, default=1000
|
|
25
|
+
Maximum number of iterations.
|
|
26
|
+
tol : float, default=1e-4
|
|
27
|
+
Tolerance for convergence.
|
|
28
|
+
device : str or Device, default='auto'
|
|
29
|
+
Computation device.
|
|
30
|
+
compute_inference : bool, default=False
|
|
31
|
+
Whether to compute post-fit inference (MCP does not support debiased).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
alpha: float = 1.0,
|
|
37
|
+
gamma: float = 3.0,
|
|
38
|
+
fit_intercept: bool = True,
|
|
39
|
+
max_iter: int = 1000,
|
|
40
|
+
tol: float = 1e-4,
|
|
41
|
+
device: Union[str, Device] = Device.AUTO,
|
|
42
|
+
compute_inference: bool = False,
|
|
43
|
+
solver: str = "auto",
|
|
44
|
+
gpu_memory_cleanup: bool = False,
|
|
45
|
+
):
|
|
46
|
+
self.gamma = gamma
|
|
47
|
+
super().__init__(
|
|
48
|
+
penalty="mcp",
|
|
49
|
+
alpha=alpha,
|
|
50
|
+
fit_intercept=fit_intercept,
|
|
51
|
+
max_iter=max_iter,
|
|
52
|
+
tol=tol,
|
|
53
|
+
device=device,
|
|
54
|
+
compute_inference=compute_inference,
|
|
55
|
+
solver=solver,
|
|
56
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
57
|
+
penalty_kwargs={"gamma": gamma},
|
|
58
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Negative Binomial regression (GLM, log link, fixed dispersion)."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from statgpu._config import Device
|
|
8
|
+
from statgpu.glm_core._family import NegativeBinomial
|
|
9
|
+
from statgpu.linear_model._glm_base import GeneralizedLinearModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NegativeBinomialRegression(GeneralizedLinearModel):
|
|
13
|
+
"""Negative Binomial regression for overdispersed count data.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
alpha : float, default=1.0
|
|
18
|
+
Dispersion parameter. Var(Y) = mu + alpha * mu^2.
|
|
19
|
+
fit_intercept : bool, default=True
|
|
20
|
+
max_iter : int, default=100
|
|
21
|
+
tol : float, default=1e-4
|
|
22
|
+
C : float, default=1.0
|
|
23
|
+
Inverse regularization strength.
|
|
24
|
+
device : str or Device, default='auto'
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
alpha: float = 1.0,
|
|
30
|
+
fit_intercept: bool = True,
|
|
31
|
+
max_iter: int = 100,
|
|
32
|
+
tol: float = 1e-4,
|
|
33
|
+
C: float = 1.0,
|
|
34
|
+
device: Device = Device.AUTO,
|
|
35
|
+
n_jobs: Optional[int] = None,
|
|
36
|
+
solver: str = "auto",
|
|
37
|
+
gpu_memory_cleanup: bool = False,
|
|
38
|
+
):
|
|
39
|
+
if not np.isfinite(alpha) or alpha <= 0.0:
|
|
40
|
+
raise ValueError("alpha must be a finite positive scalar for negative binomial regression")
|
|
41
|
+
self._alpha = alpha
|
|
42
|
+
super().__init__(
|
|
43
|
+
family="negative_binomial",
|
|
44
|
+
fit_intercept=fit_intercept,
|
|
45
|
+
max_iter=max_iter,
|
|
46
|
+
tol=tol,
|
|
47
|
+
C=C,
|
|
48
|
+
device=device,
|
|
49
|
+
n_jobs=n_jobs,
|
|
50
|
+
solver=solver,
|
|
51
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def _get_family(self):
|
|
55
|
+
return NegativeBinomial(alpha=self._alpha)
|
|
56
|
+
|
|
57
|
+
def _get_loss_kwargs(self):
|
|
58
|
+
return {"alpha": self._alpha}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Poisson regression (GLM version, no inference)."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from statgpu._config import Device
|
|
5
|
+
from statgpu.glm_core._family import Poisson
|
|
6
|
+
from statgpu.linear_model._glm_base import GeneralizedLinearModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PoissonRegression(GeneralizedLinearModel):
|
|
10
|
+
"""Poisson regression with GPU support.
|
|
11
|
+
|
|
12
|
+
Uses IRLS for fitting. No inference/summary (use statgpu's
|
|
13
|
+
existing GLM for inference).
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
fit_intercept : bool, default=True
|
|
18
|
+
max_iter : int, default=100
|
|
19
|
+
tol : float, default=1e-4
|
|
20
|
+
C : float, default=1.0
|
|
21
|
+
Inverse regularization strength.
|
|
22
|
+
device : str or Device, default='auto'
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
fit_intercept: bool = True,
|
|
28
|
+
max_iter: int = 100,
|
|
29
|
+
tol: float = 1e-4,
|
|
30
|
+
C: float = 1.0,
|
|
31
|
+
device: Device = Device.AUTO,
|
|
32
|
+
n_jobs: Optional[int] = None,
|
|
33
|
+
gpu_memory_cleanup: bool = False,
|
|
34
|
+
):
|
|
35
|
+
super().__init__(
|
|
36
|
+
family="poisson",
|
|
37
|
+
fit_intercept=fit_intercept,
|
|
38
|
+
max_iter=max_iter,
|
|
39
|
+
tol=tol,
|
|
40
|
+
C=C,
|
|
41
|
+
device=device,
|
|
42
|
+
n_jobs=n_jobs,
|
|
43
|
+
solver="auto",
|
|
44
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def _get_family(self):
|
|
48
|
+
return Poisson()
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ridge regression (L2 penalty) via PenalizedLinearRegression.
|
|
3
|
+
|
|
4
|
+
The V9 ``Ridge`` class is a thin wrapper over ``PenalizedLinearRegression``
|
|
5
|
+
with ``penalty="l2"`` and ``solver="exact"``.
|
|
6
|
+
|
|
7
|
+
The legacy standalone implementation has been moved to ``_ridge_legacy.py``.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
__all__ = ["Ridge"]
|
|
13
|
+
|
|
14
|
+
from typing import Optional, Union
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from statgpu._config import Device
|
|
19
|
+
|
|
20
|
+
from statgpu.linear_model.penalized._penalized_linear import PenalizedLinearRegression as _PenalizedLinearRegression
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Ridge(_PenalizedLinearRegression):
|
|
24
|
+
"""Thin sklearn-style wrapper over ``PenalizedLinearRegression`` with L2 penalty."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
alpha: float = 1.0,
|
|
29
|
+
fit_intercept: bool = True,
|
|
30
|
+
device: Union[str, Device] = Device.AUTO,
|
|
31
|
+
n_jobs: Optional[int] = None,
|
|
32
|
+
gpu_memory_cleanup: bool = False,
|
|
33
|
+
compute_inference: bool = True,
|
|
34
|
+
cov_type: str = "nonrobust",
|
|
35
|
+
hac_maxlags: Optional[int] = None,
|
|
36
|
+
max_iter: int = 1000,
|
|
37
|
+
tol: float = 1e-4,
|
|
38
|
+
solver: str = "exact",
|
|
39
|
+
cpu_solver: str = "fista",
|
|
40
|
+
lipschitz_L: Optional[float] = None,
|
|
41
|
+
):
|
|
42
|
+
_ct = str(cov_type).lower()
|
|
43
|
+
self.cov_type = cov_type if cov_type == _ct else _ct
|
|
44
|
+
self.hac_maxlags = hac_maxlags
|
|
45
|
+
super().__init__(
|
|
46
|
+
penalty="l2",
|
|
47
|
+
alpha=alpha,
|
|
48
|
+
fit_intercept=fit_intercept,
|
|
49
|
+
max_iter=max_iter,
|
|
50
|
+
tol=tol,
|
|
51
|
+
device=device,
|
|
52
|
+
n_jobs=n_jobs,
|
|
53
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
54
|
+
compute_inference=compute_inference,
|
|
55
|
+
cov_type=cov_type,
|
|
56
|
+
hac_maxlags=hac_maxlags,
|
|
57
|
+
solver=solver,
|
|
58
|
+
cpu_solver=cpu_solver,
|
|
59
|
+
lipschitz_L=lipschitz_L,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def fit(self, X=None, y=None, sample_weight=None, formula=None, data=None):
|
|
63
|
+
"""Fit Ridge regression model with optimized memory-efficient path.
|
|
64
|
+
|
|
65
|
+
Uses centering formulas to avoid allocating the full centered design matrix,
|
|
66
|
+
and skips expensive inference computations when ``compute_inference=False``.
|
|
67
|
+
"""
|
|
68
|
+
if (formula is not None
|
|
69
|
+
or self._get_compute_device() != Device.CPU
|
|
70
|
+
or self.solver != "exact"):
|
|
71
|
+
# Fall back to parent for formula, GPU, or non-exact solver
|
|
72
|
+
return super().fit(X=X, y=y, sample_weight=sample_weight, formula=formula, data=data)
|
|
73
|
+
|
|
74
|
+
X_np = np.asarray(self._to_array(X, Device.CPU), dtype=np.float64)
|
|
75
|
+
y_np = np.asarray(self._to_array(y, Device.CPU), dtype=np.float64)
|
|
76
|
+
|
|
77
|
+
n_samples, n_features = X_np.shape
|
|
78
|
+
self._nobs = n_samples
|
|
79
|
+
self._fitted = False
|
|
80
|
+
|
|
81
|
+
sw = np.asarray(sample_weight, dtype=np.float64).ravel() if sample_weight is not None else None
|
|
82
|
+
|
|
83
|
+
if self.fit_intercept:
|
|
84
|
+
if sw is not None:
|
|
85
|
+
w_sum = float(sw.sum())
|
|
86
|
+
X_wmean = np.average(X_np, axis=0, weights=sw)
|
|
87
|
+
y_wmean = float(np.average(y_np, weights=sw))
|
|
88
|
+
else:
|
|
89
|
+
X_wmean = np.mean(X_np, axis=0)
|
|
90
|
+
y_wmean = np.mean(y_np)
|
|
91
|
+
|
|
92
|
+
# Build Gram matrix and RHS.
|
|
93
|
+
# Weighted: X'WX, X'Wy. Unweighted: X'X, X'y.
|
|
94
|
+
# Centering for intercept: subtract weighted/unweighted outer product.
|
|
95
|
+
if sw is not None:
|
|
96
|
+
# Weighted normal equations: (X'WX + alpha*I) coef = X'Wy
|
|
97
|
+
sw_col = sw[:, None]
|
|
98
|
+
XtX = (X_np * sw_col).T @ X_np
|
|
99
|
+
Xty = (X_np * sw_col).T @ y_np
|
|
100
|
+
if self.fit_intercept:
|
|
101
|
+
XtX -= w_sum * np.outer(X_wmean, X_wmean)
|
|
102
|
+
Xty -= w_sum * X_wmean * y_wmean
|
|
103
|
+
n_eff = w_sum
|
|
104
|
+
else:
|
|
105
|
+
n_eff = float(sw.sum())
|
|
106
|
+
else:
|
|
107
|
+
if self.fit_intercept:
|
|
108
|
+
X_mean = np.mean(X_np, axis=0)
|
|
109
|
+
y_mean = np.mean(y_np)
|
|
110
|
+
XtX = X_np.T @ X_np
|
|
111
|
+
XtX -= n_samples * np.outer(X_mean, X_mean)
|
|
112
|
+
Xty = X_np.T @ y_np
|
|
113
|
+
Xty -= n_samples * X_mean * y_mean
|
|
114
|
+
else:
|
|
115
|
+
XtX = X_np.T @ X_np
|
|
116
|
+
Xty = X_np.T @ y_np
|
|
117
|
+
n_eff = float(n_samples)
|
|
118
|
+
|
|
119
|
+
if Xty.ndim == 0:
|
|
120
|
+
Xty = Xty.reshape(1)
|
|
121
|
+
if Xty.ndim == 1:
|
|
122
|
+
Xty = Xty.reshape(-1, 1)
|
|
123
|
+
|
|
124
|
+
# Solve (XtX + n_eff*alpha*I) @ coef = Xty
|
|
125
|
+
# n_eff scaling matches PenalizedGeneralizedLinearModel exact ridge
|
|
126
|
+
# and sklearn Ridge convention.
|
|
127
|
+
A = XtX + float(self.alpha) * n_eff * np.eye(n_features, dtype=np.float64)
|
|
128
|
+
try:
|
|
129
|
+
coef = np.linalg.solve(A, Xty).flatten()
|
|
130
|
+
except np.linalg.LinAlgError:
|
|
131
|
+
coef = np.linalg.lstsq(A, Xty, rcond=None)[0].flatten()
|
|
132
|
+
|
|
133
|
+
if self.fit_intercept:
|
|
134
|
+
self.intercept_ = float(y_wmean - X_wmean @ coef)
|
|
135
|
+
self.coef_ = coef
|
|
136
|
+
self._params = np.concatenate([[self.intercept_], self.coef_])
|
|
137
|
+
else:
|
|
138
|
+
self.intercept_ = 0.0
|
|
139
|
+
self.coef_ = coef
|
|
140
|
+
self._params = self.coef_.copy()
|
|
141
|
+
|
|
142
|
+
self._X_design = None
|
|
143
|
+
self._resid = None
|
|
144
|
+
self._scale = np.nan
|
|
145
|
+
self.n_iter_ = 1
|
|
146
|
+
self._df_resid = n_samples - (n_features + (1 if self.fit_intercept else 0))
|
|
147
|
+
|
|
148
|
+
# Build design matrix and compute residuals only when inference is needed
|
|
149
|
+
if self.compute_inference:
|
|
150
|
+
if self.fit_intercept:
|
|
151
|
+
self._X_design = np.column_stack([np.ones(n_samples, dtype=X_np.dtype), X_np])
|
|
152
|
+
else:
|
|
153
|
+
self._X_design = X_np.copy()
|
|
154
|
+
y_pred = self._X_design @ self._params
|
|
155
|
+
self._resid = y_np - y_pred
|
|
156
|
+
if self._df_resid > 0:
|
|
157
|
+
resid_sq = self._resid ** 2
|
|
158
|
+
self._scale = float(np.sum(resid_sq)) / self._df_resid
|
|
159
|
+
# Compute inference statistics (bse, tvalues, pvalues, conf_int).
|
|
160
|
+
# For weighted fits, _compute_post_fit_gaussian_inference uses
|
|
161
|
+
# sqrt(w)*X internally, producing correct weighted scale and
|
|
162
|
+
# consistent inference attributes.
|
|
163
|
+
self._compute_post_fit_gaussian_inference(X_np, y_np, sample_weight=sample_weight)
|
|
164
|
+
|
|
165
|
+
self._fitted = True
|
|
166
|
+
return self
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""SCAD-penalized regression (Fan & Li, JASA 2001)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Union
|
|
6
|
+
|
|
7
|
+
from statgpu._config import Device
|
|
8
|
+
from statgpu.linear_model.penalized._penalized_linear import PenalizedLinearRegression
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SCADRegression(PenalizedLinearRegression):
|
|
12
|
+
"""SCAD-penalized regression.
|
|
13
|
+
|
|
14
|
+
Non-convex penalty with oracle property. Uses LLA+FISTA for optimization.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
alpha : float, default=1.0
|
|
19
|
+
Regularization strength.
|
|
20
|
+
a : float, default=3.7
|
|
21
|
+
Concavity parameter (Fan & Li recommend 3.7).
|
|
22
|
+
fit_intercept : bool, default=True
|
|
23
|
+
Whether to calculate the intercept.
|
|
24
|
+
max_iter : int, default=1000
|
|
25
|
+
Maximum number of iterations.
|
|
26
|
+
tol : float, default=1e-4
|
|
27
|
+
Tolerance for convergence.
|
|
28
|
+
device : str or Device, default='auto'
|
|
29
|
+
Computation device.
|
|
30
|
+
compute_inference : bool, default=False
|
|
31
|
+
Whether to compute post-fit inference (SCAD does not support debiased).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
alpha: float = 1.0,
|
|
37
|
+
a: float = 3.7,
|
|
38
|
+
fit_intercept: bool = True,
|
|
39
|
+
max_iter: int = 1000,
|
|
40
|
+
tol: float = 1e-4,
|
|
41
|
+
device: Union[str, Device] = Device.AUTO,
|
|
42
|
+
compute_inference: bool = False,
|
|
43
|
+
solver: str = "auto",
|
|
44
|
+
gpu_memory_cleanup: bool = False,
|
|
45
|
+
):
|
|
46
|
+
self.a = a
|
|
47
|
+
super().__init__(
|
|
48
|
+
penalty="scad",
|
|
49
|
+
alpha=alpha,
|
|
50
|
+
fit_intercept=fit_intercept,
|
|
51
|
+
max_iter=max_iter,
|
|
52
|
+
tol=tol,
|
|
53
|
+
device=device,
|
|
54
|
+
compute_inference=compute_inference,
|
|
55
|
+
solver=solver,
|
|
56
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
57
|
+
penalty_kwargs={"a": a},
|
|
58
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Tweedie regression (GLM, log link, power parameter)."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from statgpu._config import Device
|
|
6
|
+
from statgpu.glm_core._family import Tweedie
|
|
7
|
+
from statgpu.linear_model._glm_base import GeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TweedieRegression(GeneralizedLinearModel):
|
|
11
|
+
"""Tweedie regression for compound Poisson-Gamma outcomes.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
power : float, default=1.5
|
|
16
|
+
Tweedie power parameter. Must be in (1, 2).
|
|
17
|
+
1 < power < 2: compound Poisson-Gamma.
|
|
18
|
+
fit_intercept : bool, default=True
|
|
19
|
+
max_iter : int, default=100
|
|
20
|
+
tol : float, default=1e-4
|
|
21
|
+
C : float, default=1.0
|
|
22
|
+
Inverse regularization strength.
|
|
23
|
+
device : str or Device, default='auto'
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
power: float = 1.5,
|
|
29
|
+
fit_intercept: bool = True,
|
|
30
|
+
max_iter: int = 100,
|
|
31
|
+
tol: float = 1e-4,
|
|
32
|
+
C: float = 1.0,
|
|
33
|
+
device: Device = Device.AUTO,
|
|
34
|
+
n_jobs: Optional[int] = None,
|
|
35
|
+
solver: str = "auto",
|
|
36
|
+
gpu_memory_cleanup: bool = False,
|
|
37
|
+
):
|
|
38
|
+
if not 1.0 < power < 2.0:
|
|
39
|
+
raise ValueError(f"Tweedie power must be in (1, 2), got {power}")
|
|
40
|
+
self._power = power
|
|
41
|
+
super().__init__(
|
|
42
|
+
family="tweedie",
|
|
43
|
+
fit_intercept=fit_intercept,
|
|
44
|
+
max_iter=max_iter,
|
|
45
|
+
tol=tol,
|
|
46
|
+
C=C,
|
|
47
|
+
device=device,
|
|
48
|
+
n_jobs=n_jobs,
|
|
49
|
+
solver=solver,
|
|
50
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def _get_family(self):
|
|
54
|
+
return Tweedie(power=self._power)
|
|
55
|
+
|
|
56
|
+
def _get_loss_kwargs(self):
|
|
57
|
+
return {"power": self._power}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Classification metrics utilities."""
|
|
2
|
+
|
|
3
|
+
from ._classification import (
|
|
4
|
+
binary_average_precision_score,
|
|
5
|
+
binary_classification_table,
|
|
6
|
+
binary_confusion_matrix,
|
|
7
|
+
binary_precision_recall_curve,
|
|
8
|
+
binary_roc_auc_score,
|
|
9
|
+
binary_roc_curve,
|
|
10
|
+
evaluate_binary_classification,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"binary_confusion_matrix",
|
|
15
|
+
"binary_classification_table",
|
|
16
|
+
"binary_precision_recall_curve",
|
|
17
|
+
"binary_average_precision_score",
|
|
18
|
+
"binary_roc_curve",
|
|
19
|
+
"binary_roc_auc_score",
|
|
20
|
+
"evaluate_binary_classification",
|
|
21
|
+
]
|