statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Prediction mixin for PenalizedGeneralizedLinearModel."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from statgpu._config import Device
|
|
9
|
+
from statgpu.backends import _to_numpy
|
|
10
|
+
|
|
11
|
+
# Eta (linear predictor) clipping bound for numerical stability in GLM link functions.
|
|
12
|
+
# Prevents overflow in exp(eta) for log-link families and sigmoid(eta) for logistic.
|
|
13
|
+
# Value of 500 is safe because exp(500) ≈ 1.4e217 (within float64 range).
|
|
14
|
+
_ETA_CLIP = 500.0
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from ._base import PenalizedGeneralizedLinearModel as _Self
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _PenalizedPredictMixin:
|
|
21
|
+
|
|
22
|
+
def _prepare_predict_X(self, X):
|
|
23
|
+
"""Apply stored formula design metadata to DataFrame inputs."""
|
|
24
|
+
if self._design_info is not None:
|
|
25
|
+
try:
|
|
26
|
+
import pandas as pd
|
|
27
|
+
except ImportError:
|
|
28
|
+
pd = None
|
|
29
|
+
if pd is not None and isinstance(X, pd.DataFrame):
|
|
30
|
+
from statgpu.core.formula import FormulaParser
|
|
31
|
+
|
|
32
|
+
parser = FormulaParser.__new__(FormulaParser)
|
|
33
|
+
parser._design_info = self._design_info
|
|
34
|
+
parser.formula = None
|
|
35
|
+
X = parser.transform(X)
|
|
36
|
+
col_names = list(self._design_info.column_names)
|
|
37
|
+
if self._formula_has_intercept and "Intercept" in col_names:
|
|
38
|
+
X = np.delete(X, col_names.index("Intercept"), axis=1)
|
|
39
|
+
# Formula processing produces numpy arrays
|
|
40
|
+
return np.asarray(X)
|
|
41
|
+
# No formula: return X as-is to avoid unnecessary GPU→CPU→GPU round-trip
|
|
42
|
+
return X
|
|
43
|
+
|
|
44
|
+
def _prediction_backend_name(self):
|
|
45
|
+
backend_name = getattr(self, "_selected_backend_name", None)
|
|
46
|
+
if backend_name == "cupy" and self._cupy_available():
|
|
47
|
+
return "cupy"
|
|
48
|
+
if backend_name == "torch" and self._torch_cuda_available():
|
|
49
|
+
return "torch"
|
|
50
|
+
if backend_name == "numpy":
|
|
51
|
+
return "numpy"
|
|
52
|
+
if self.device == Device.AUTO:
|
|
53
|
+
return "numpy"
|
|
54
|
+
device = self._get_compute_device()
|
|
55
|
+
if device == Device.CUDA:
|
|
56
|
+
if self._cupy_available():
|
|
57
|
+
return "cupy"
|
|
58
|
+
raise RuntimeError(
|
|
59
|
+
"device='cuda' was explicitly requested, but CuPy/CUDA is unavailable at prediction time."
|
|
60
|
+
)
|
|
61
|
+
if device == Device.TORCH:
|
|
62
|
+
if self._torch_cuda_available():
|
|
63
|
+
return "torch"
|
|
64
|
+
raise RuntimeError(
|
|
65
|
+
"device='torch' was explicitly requested, but Torch CUDA is unavailable at prediction time."
|
|
66
|
+
)
|
|
67
|
+
return "numpy"
|
|
68
|
+
|
|
69
|
+
def predict(self, X, return_cpu=True):
|
|
70
|
+
"""
|
|
71
|
+
Predict using fitted model.
|
|
72
|
+
|
|
73
|
+
For squared_error: returns linear prediction.
|
|
74
|
+
For logistic: returns binary class labels.
|
|
75
|
+
For poisson: returns exp(linear prediction) (count values).
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
X : array-like of shape (n_samples, n_features)
|
|
80
|
+
Test data.
|
|
81
|
+
return_cpu : bool, default=True
|
|
82
|
+
If True, always return a numpy ndarray (GPU→CPU transfer happens
|
|
83
|
+
automatically when the model was fitted on GPU). If False, return
|
|
84
|
+
the result in the same backend as the fitted coefficients (cupy/
|
|
85
|
+
torch when fitted on GPU, numpy when fitted on CPU). Setting to
|
|
86
|
+
False avoids an unnecessary D→H transfer when chaining GPU
|
|
87
|
+
operations (e.g., ``model.predict(X_gpu) - y_gpu``).
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
y_pred : ndarray of shape (n_samples,)
|
|
92
|
+
Predicted values.
|
|
93
|
+
"""
|
|
94
|
+
if self.coef_ is None:
|
|
95
|
+
raise RuntimeError("Model has not been fitted yet.")
|
|
96
|
+
|
|
97
|
+
X = self._prepare_predict_X(X)
|
|
98
|
+
backend_name = self._prediction_backend_name()
|
|
99
|
+
if backend_name == "cupy":
|
|
100
|
+
import cupy as cp
|
|
101
|
+
Xb = cp.asarray(self._to_array(X, Device.CUDA))
|
|
102
|
+
coef = cp.asarray(self.coef_)
|
|
103
|
+
raw = Xb @ coef
|
|
104
|
+
if self._effective_intercept:
|
|
105
|
+
raw += cp.asarray(self.intercept_, dtype=raw.dtype)
|
|
106
|
+
if self.loss == "logistic":
|
|
107
|
+
p = 1.0 / (1.0 + cp.exp(-cp.clip(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
108
|
+
result = (p > 0.5).astype(float)
|
|
109
|
+
elif self.loss != "squared_error":
|
|
110
|
+
result = self._family_for_loss().link.inverse(raw)
|
|
111
|
+
else:
|
|
112
|
+
result = raw
|
|
113
|
+
return _to_numpy(result) if return_cpu else result
|
|
114
|
+
if backend_name == "torch":
|
|
115
|
+
import torch
|
|
116
|
+
Xb = self._to_array(X, Device.TORCH, backend="torch").to(torch.float64)
|
|
117
|
+
coef = torch.as_tensor(self.coef_, dtype=Xb.dtype, device=Xb.device)
|
|
118
|
+
raw = Xb @ coef
|
|
119
|
+
if self._effective_intercept:
|
|
120
|
+
raw = raw + torch.as_tensor(
|
|
121
|
+
self.intercept_, dtype=raw.dtype, device=raw.device
|
|
122
|
+
)
|
|
123
|
+
if self.loss == "logistic":
|
|
124
|
+
p = 1.0 / (1.0 + torch.exp(-torch.clamp(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
125
|
+
result = (p > 0.5).to(raw.dtype)
|
|
126
|
+
elif self.loss != "squared_error":
|
|
127
|
+
result = self._family_for_loss().link.inverse(raw)
|
|
128
|
+
else:
|
|
129
|
+
result = raw
|
|
130
|
+
return _to_numpy(result) if return_cpu else result
|
|
131
|
+
|
|
132
|
+
raw = X @ self.coef_
|
|
133
|
+
if self._effective_intercept:
|
|
134
|
+
raw += self.intercept_
|
|
135
|
+
|
|
136
|
+
# Apply link inverse for GLM losses
|
|
137
|
+
if self.loss == "logistic":
|
|
138
|
+
p = 1.0 / (1.0 + np.exp(-np.clip(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
139
|
+
return (p > 0.5).astype(float)
|
|
140
|
+
elif self.loss != "squared_error":
|
|
141
|
+
return self._family_for_loss().link.inverse(raw)
|
|
142
|
+
return raw
|
|
143
|
+
|
|
144
|
+
def score(self, X, y, sample_weight=None):
|
|
145
|
+
"""
|
|
146
|
+
Return goodness-of-fit score (R² = 1 - SS_res/SS_tot).
|
|
147
|
+
|
|
148
|
+
For all loss types, computes the standard R² metric on the response
|
|
149
|
+
scale. For squared_error this is the classical R². For GLM losses
|
|
150
|
+
(logistic, Poisson, Gamma, etc.) this is R² on the original y scale,
|
|
151
|
+
not the deviance-based pseudo-R².
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
X : array-like of shape (n_samples, n_features)
|
|
156
|
+
Test data.
|
|
157
|
+
y : array-like of shape (n_samples,)
|
|
158
|
+
True values.
|
|
159
|
+
sample_weight : array-like of shape (n_samples,), optional
|
|
160
|
+
Sample weights. When provided, returns weighted R².
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
score : float
|
|
165
|
+
R² or pseudo-R² score.
|
|
166
|
+
"""
|
|
167
|
+
# Use predict(return_cpu=True) to avoid device mismatch between
|
|
168
|
+
# predict() and score() backend resolution logic.
|
|
169
|
+
y_pred_np = np.asarray(_to_numpy(self.predict(X, return_cpu=True)))
|
|
170
|
+
y = np.asarray(y)
|
|
171
|
+
sw = np.asarray(sample_weight, dtype=np.float64).ravel() if sample_weight is not None else None
|
|
172
|
+
resid_sq = (y - y_pred_np) ** 2
|
|
173
|
+
if sw is not None:
|
|
174
|
+
w_sum = float(np.sum(sw))
|
|
175
|
+
if w_sum <= 0:
|
|
176
|
+
return 0.0
|
|
177
|
+
ss_res = float(np.sum(sw * resid_sq))
|
|
178
|
+
ss_tot = float(np.sum(sw * (y - np.average(y, weights=sw)) ** 2))
|
|
179
|
+
else:
|
|
180
|
+
ss_res = float(np.sum(resid_sq))
|
|
181
|
+
ss_tot = float(np.sum((y - np.mean(y)) ** 2))
|
|
182
|
+
return 1 - ss_res / ss_tot if ss_tot > 0 else 0.0
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Basic model wrappers (thin wrappers over PenalizedGLM / GLM base)."""
|
|
2
|
+
|
|
3
|
+
from ._linear import LinearRegression
|
|
4
|
+
from ._ridge import Ridge
|
|
5
|
+
from ._lasso import Lasso
|
|
6
|
+
from ._elasticnet import ElasticNet
|
|
7
|
+
from ._adaptive_lasso import AdaptiveLasso
|
|
8
|
+
from ._scad import SCADRegression
|
|
9
|
+
from ._mcp import MCPRegression
|
|
10
|
+
from ._logistic import LogisticRegression
|
|
11
|
+
from ._gamma import GammaRegression
|
|
12
|
+
from ._poisson import PoissonRegression
|
|
13
|
+
from ._inverse_gaussian import InverseGaussianRegression
|
|
14
|
+
from ._negative_binomial import NegativeBinomialRegression
|
|
15
|
+
from ._tweedie import TweedieRegression
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"LinearRegression",
|
|
19
|
+
"Ridge",
|
|
20
|
+
"Lasso",
|
|
21
|
+
"ElasticNet",
|
|
22
|
+
"AdaptiveLasso",
|
|
23
|
+
"SCADRegression",
|
|
24
|
+
"MCPRegression",
|
|
25
|
+
"LogisticRegression",
|
|
26
|
+
"GammaRegression",
|
|
27
|
+
"PoissonRegression",
|
|
28
|
+
"InverseGaussianRegression",
|
|
29
|
+
"NegativeBinomialRegression",
|
|
30
|
+
"TweedieRegression",
|
|
31
|
+
]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Adaptive Lasso regression (Zou, JASA 2006)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Union
|
|
6
|
+
|
|
7
|
+
from statgpu._config import Device
|
|
8
|
+
from statgpu.linear_model.penalized._penalized_linear import PenalizedLinearRegression
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AdaptiveLasso(PenalizedLinearRegression):
|
|
12
|
+
"""Adaptive Lasso regression.
|
|
13
|
+
|
|
14
|
+
Uses data-driven per-coordinate weights: w_j = 1/(|init_coef_j| + eps)^nu.
|
|
15
|
+
Provides oracle property under regularity conditions (Zou 2006).
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
alpha : float, default=1.0
|
|
20
|
+
Regularization strength.
|
|
21
|
+
nu : float, default=1.0
|
|
22
|
+
Exponent for weight computation.
|
|
23
|
+
fit_intercept : bool, default=True
|
|
24
|
+
Whether to calculate the intercept.
|
|
25
|
+
max_iter : int, default=1000
|
|
26
|
+
Maximum number of iterations.
|
|
27
|
+
tol : float, default=1e-4
|
|
28
|
+
Tolerance for convergence.
|
|
29
|
+
device : str or Device, default='auto'
|
|
30
|
+
Computation device.
|
|
31
|
+
compute_inference : bool, default=False
|
|
32
|
+
Whether to compute post-fit inference.
|
|
33
|
+
inference_method : str, default='debiased'
|
|
34
|
+
Inference method.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
alpha: float = 1.0,
|
|
40
|
+
nu: float = 1.0,
|
|
41
|
+
fit_intercept: bool = True,
|
|
42
|
+
max_iter: int = 1000,
|
|
43
|
+
tol: float = 1e-4,
|
|
44
|
+
device: Union[str, Device] = Device.AUTO,
|
|
45
|
+
compute_inference: bool = False,
|
|
46
|
+
inference_method: str = "debiased",
|
|
47
|
+
solver: str = "auto",
|
|
48
|
+
gpu_memory_cleanup: bool = False,
|
|
49
|
+
):
|
|
50
|
+
self.nu = nu
|
|
51
|
+
super().__init__(
|
|
52
|
+
penalty="adaptive_l1",
|
|
53
|
+
alpha=alpha,
|
|
54
|
+
fit_intercept=fit_intercept,
|
|
55
|
+
max_iter=max_iter,
|
|
56
|
+
tol=tol,
|
|
57
|
+
device=device,
|
|
58
|
+
compute_inference=compute_inference,
|
|
59
|
+
inference_method=inference_method,
|
|
60
|
+
solver=solver,
|
|
61
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
62
|
+
penalty_kwargs={"nu": nu},
|
|
63
|
+
)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Elastic Net regression with GPU support.
|
|
3
|
+
|
|
4
|
+
The V9 ElasticNet class is a thin wrapper over PenalizedLinearRegression
|
|
5
|
+
with penalty="elasticnet" and solver="exact".
|
|
6
|
+
|
|
7
|
+
The legacy standalone implementation has been moved to _elasticnet_legacy.py.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
__all__ = ["ElasticNet"]
|
|
13
|
+
|
|
14
|
+
from typing import Optional, Union
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from statgpu._config import Device
|
|
19
|
+
from statgpu.linear_model.penalized._penalized_linear import PenalizedLinearRegression as _PenalizedLinearRegression
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ElasticNet(_PenalizedLinearRegression):
|
|
23
|
+
"""Thin sklearn-style wrapper over ``PenalizedLinearRegression`` with Elastic Net penalty."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
alpha: float = 1.0,
|
|
28
|
+
l1_ratio: float = 0.5,
|
|
29
|
+
fit_intercept: bool = True,
|
|
30
|
+
max_iter: int = 1000,
|
|
31
|
+
tol: float = 1e-4,
|
|
32
|
+
stopping: str = "coef_delta",
|
|
33
|
+
device: Union[str, Device] = Device.AUTO,
|
|
34
|
+
n_jobs: Optional[int] = None,
|
|
35
|
+
solver: str = "fista",
|
|
36
|
+
cpu_solver: str = "fista",
|
|
37
|
+
lipschitz_L: Optional[float] = None,
|
|
38
|
+
gpu_memory_cleanup: bool = False,
|
|
39
|
+
):
|
|
40
|
+
if alpha < 0:
|
|
41
|
+
raise ValueError(f"alpha must be non-negative, got {alpha}")
|
|
42
|
+
self.stopping = str(stopping).lower()
|
|
43
|
+
super().__init__(
|
|
44
|
+
penalty="elasticnet",
|
|
45
|
+
alpha=alpha,
|
|
46
|
+
l1_ratio=l1_ratio,
|
|
47
|
+
fit_intercept=fit_intercept,
|
|
48
|
+
max_iter=max_iter,
|
|
49
|
+
tol=tol,
|
|
50
|
+
device=device,
|
|
51
|
+
n_jobs=n_jobs,
|
|
52
|
+
solver=solver,
|
|
53
|
+
cpu_solver=cpu_solver,
|
|
54
|
+
lipschitz_L=lipschitz_L,
|
|
55
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
56
|
+
stopping=stopping,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def fit(self, X=None, y=None, sample_weight=None, initial_coef=None, **kwargs):
|
|
60
|
+
"""Fit Elastic Net model.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
X : array-like of shape (n_samples, n_features)
|
|
65
|
+
Training data.
|
|
66
|
+
y : array-like of shape (n_samples,)
|
|
67
|
+
Target values.
|
|
68
|
+
sample_weight : array-like of shape (n_samples,), optional
|
|
69
|
+
Sample weights.
|
|
70
|
+
initial_coef : array-like of shape (n_features,), optional
|
|
71
|
+
Warm-start coefficients. Passed to the underlying solver.
|
|
72
|
+
"""
|
|
73
|
+
if initial_coef is not None:
|
|
74
|
+
self._init_coef = np.asarray(initial_coef, dtype=np.float64)
|
|
75
|
+
return super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Gamma regression (GLM with Gamma family, log link)."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from statgpu._config import Device
|
|
6
|
+
from statgpu.glm_core._family import Gamma, LogLink, InversePowerLink
|
|
7
|
+
from statgpu.linear_model._glm_base import GeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_LINK_MAP = {
|
|
11
|
+
"log": LogLink,
|
|
12
|
+
"inverse_power": InversePowerLink,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GammaRegression(GeneralizedLinearModel):
|
|
17
|
+
"""Gamma regression for positive continuous outcomes.
|
|
18
|
+
|
|
19
|
+
Uses log link by default for numerical stability. The canonical
|
|
20
|
+
inverse_power link is also supported.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
fit_intercept : bool, default=True
|
|
25
|
+
max_iter : int, default=100
|
|
26
|
+
tol : float, default=1e-4
|
|
27
|
+
C : float, default=1.0
|
|
28
|
+
Inverse regularization strength.
|
|
29
|
+
device : str or Device, default='auto'
|
|
30
|
+
link : str, default='log'
|
|
31
|
+
Link function: 'log' or 'inverse_power'.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
fit_intercept: bool = True,
|
|
37
|
+
max_iter: int = 100,
|
|
38
|
+
tol: float = 1e-4,
|
|
39
|
+
C: float = 1.0,
|
|
40
|
+
device: Device = Device.AUTO,
|
|
41
|
+
n_jobs: Optional[int] = None,
|
|
42
|
+
link: str = "log",
|
|
43
|
+
solver: str = "auto",
|
|
44
|
+
gpu_memory_cleanup: bool = False,
|
|
45
|
+
):
|
|
46
|
+
self._link_name = link
|
|
47
|
+
super().__init__(
|
|
48
|
+
family="gamma",
|
|
49
|
+
fit_intercept=fit_intercept,
|
|
50
|
+
max_iter=max_iter,
|
|
51
|
+
tol=tol,
|
|
52
|
+
C=C,
|
|
53
|
+
device=device,
|
|
54
|
+
n_jobs=n_jobs,
|
|
55
|
+
solver=solver,
|
|
56
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _get_family(self):
|
|
60
|
+
if self._link_name not in _LINK_MAP:
|
|
61
|
+
valid = ", ".join(sorted(_LINK_MAP))
|
|
62
|
+
raise ValueError(f"GammaRegression link must be one of: {valid}")
|
|
63
|
+
link_cls = _LINK_MAP[self._link_name]
|
|
64
|
+
return Gamma(link=link_cls())
|
|
65
|
+
|
|
66
|
+
def _get_loss_kwargs(self):
|
|
67
|
+
return {"link": self._link_name}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Inverse Gaussian regression (GLM, log link)."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from statgpu._config import Device
|
|
6
|
+
from statgpu.glm_core._family import InverseGaussian
|
|
7
|
+
from statgpu.linear_model._glm_base import GeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InverseGaussianRegression(GeneralizedLinearModel):
|
|
11
|
+
"""Inverse Gaussian regression for positive right-skewed outcomes.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
fit_intercept : bool, default=True
|
|
16
|
+
max_iter : int, default=100
|
|
17
|
+
tol : float, default=1e-4
|
|
18
|
+
C : float, default=1.0
|
|
19
|
+
Inverse regularization strength.
|
|
20
|
+
device : str or Device, default='auto'
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
fit_intercept: bool = True,
|
|
26
|
+
max_iter: int = 100,
|
|
27
|
+
tol: float = 1e-4,
|
|
28
|
+
C: float = 1.0,
|
|
29
|
+
device: Device = Device.AUTO,
|
|
30
|
+
n_jobs: Optional[int] = None,
|
|
31
|
+
solver: str = "auto",
|
|
32
|
+
gpu_memory_cleanup: bool = False,
|
|
33
|
+
):
|
|
34
|
+
super().__init__(
|
|
35
|
+
family="inverse_gaussian",
|
|
36
|
+
fit_intercept=fit_intercept,
|
|
37
|
+
max_iter=max_iter,
|
|
38
|
+
tol=tol,
|
|
39
|
+
C=C,
|
|
40
|
+
device=device,
|
|
41
|
+
n_jobs=n_jobs,
|
|
42
|
+
solver=solver,
|
|
43
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def _get_family(self):
|
|
47
|
+
return InverseGaussian()
|