statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LassoCV: Cross-validated Lasso regression with GPU support.
|
|
3
|
+
|
|
4
|
+
This module exports LassoCV which delegates to _select_lasso_alpha_cv
|
|
5
|
+
from _lasso.py for all CV logic (cache, fast-refit, backend-aware).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__all__ = ["LassoCV"]
|
|
9
|
+
|
|
10
|
+
from typing import Optional, Union
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from statgpu._config import Device
|
|
15
|
+
from statgpu.cross_validation._base import CVEstimatorBase
|
|
16
|
+
from statgpu.linear_model.wrappers._lasso import (
|
|
17
|
+
Lasso,
|
|
18
|
+
_normalize_lassocv_method,
|
|
19
|
+
_normalize_cd_kkt_check_every,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Shared hash function from _cv_base.py
|
|
24
|
+
from statgpu.cross_validation._base import hash_cv_data as _hash_data
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# =============================================================================
|
|
28
|
+
# LassoCV Class
|
|
29
|
+
# =============================================================================
|
|
30
|
+
|
|
31
|
+
class LassoCV(CVEstimatorBase):
|
|
32
|
+
"""
|
|
33
|
+
Cross-validated Lasso regression with GPU support.
|
|
34
|
+
|
|
35
|
+
This class implements K-fold cross-validation to select the optimal
|
|
36
|
+
regularization parameter alpha for Lasso regression.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
alphas : array-like or None
|
|
41
|
+
Alpha values to try. If None, generates n_alphas values.
|
|
42
|
+
n_alphas : int
|
|
43
|
+
Number of alpha values (if alphas is None). Default is 12.
|
|
44
|
+
alpha_min_ratio : float
|
|
45
|
+
Minimum alpha as a ratio of max alpha.
|
|
46
|
+
cv : int
|
|
47
|
+
Number of CV folds. Default is 5.
|
|
48
|
+
fit_intercept : bool
|
|
49
|
+
Whether to fit intercept. Default is False.
|
|
50
|
+
device : str or Device
|
|
51
|
+
Computation device: 'cpu', 'cuda', or 'auto'.
|
|
52
|
+
max_iter : int
|
|
53
|
+
Maximum iterations for Lasso solver. Default is 3000.
|
|
54
|
+
tol : float
|
|
55
|
+
Convergence tolerance. Default is 1e-4.
|
|
56
|
+
compute_inference : bool
|
|
57
|
+
Whether to compute standard errors, t-stats, p-values and CI.
|
|
58
|
+
random_state : int or None
|
|
59
|
+
Random seed for CV splits.
|
|
60
|
+
gpu_cv_mixed_precision : bool
|
|
61
|
+
Whether to use mixed precision on GPU.
|
|
62
|
+
|
|
63
|
+
Attributes
|
|
64
|
+
----------
|
|
65
|
+
alpha_ : float
|
|
66
|
+
Selected alpha value.
|
|
67
|
+
alphas_ : ndarray
|
|
68
|
+
All alpha values tested.
|
|
69
|
+
cv_results_ : dict
|
|
70
|
+
CV results including mse_path and mean_mse.
|
|
71
|
+
best_score_ : float
|
|
72
|
+
Best (minimum) MSE across CV folds.
|
|
73
|
+
coef_ : ndarray
|
|
74
|
+
Coefficients of the final model.
|
|
75
|
+
intercept_ : float
|
|
76
|
+
Intercept of the final model.
|
|
77
|
+
estimator_ : Lasso
|
|
78
|
+
The fitted Lasso estimator with selected alpha.
|
|
79
|
+
|
|
80
|
+
Examples
|
|
81
|
+
--------
|
|
82
|
+
>>> import numpy as np
|
|
83
|
+
>>> from statgpu.linear_model import LassoCV
|
|
84
|
+
>>> X = np.random.randn(1000, 20)
|
|
85
|
+
>>> y = X @ np.random.randn(20) + 0.1 * np.random.randn(1000)
|
|
86
|
+
>>> model = LassoCV(cv=5, device='cuda')
|
|
87
|
+
>>> model.fit(X, y)
|
|
88
|
+
>>> print(f"Selected alpha: {model.alpha_:.4f}")
|
|
89
|
+
>>> print(f"Best CV score: {model.best_score_:.4f}")
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
alphas=None,
|
|
95
|
+
n_alphas: int = 12,
|
|
96
|
+
alpha_min_ratio: float = 1e-3,
|
|
97
|
+
cv: int = 5,
|
|
98
|
+
cv_splits=None,
|
|
99
|
+
fit_intercept: bool = True,
|
|
100
|
+
device: Union[str, Device] = Device.AUTO,
|
|
101
|
+
n_jobs: Optional[int] = None,
|
|
102
|
+
compute_inference: bool = False,
|
|
103
|
+
max_iter: int = 3000,
|
|
104
|
+
tol: float = 1e-4,
|
|
105
|
+
stopping: str = "coef_delta",
|
|
106
|
+
solver: str = "fista",
|
|
107
|
+
cpu_solver: str = "coordinate_descent",
|
|
108
|
+
method: str = "standard",
|
|
109
|
+
cd_kkt_check_every: Optional[int] = None,
|
|
110
|
+
inference_method: str = "cpu_ols_inference",
|
|
111
|
+
lipschitz_L: Optional[float] = None,
|
|
112
|
+
admm_rho: float = 1.0,
|
|
113
|
+
gpu_memory_cleanup: bool = False,
|
|
114
|
+
random_state: Optional[int] = None,
|
|
115
|
+
gpu_cv_mixed_precision: bool = True,
|
|
116
|
+
):
|
|
117
|
+
super().__init__(
|
|
118
|
+
cv=cv,
|
|
119
|
+
random_state=random_state,
|
|
120
|
+
device=device,
|
|
121
|
+
n_jobs=n_jobs,
|
|
122
|
+
)
|
|
123
|
+
self.alphas = alphas
|
|
124
|
+
self.n_alphas = int(n_alphas)
|
|
125
|
+
self.alpha_min_ratio = float(alpha_min_ratio)
|
|
126
|
+
self.cv = int(cv)
|
|
127
|
+
self.cv_splits = cv_splits
|
|
128
|
+
self.fit_intercept = bool(fit_intercept)
|
|
129
|
+
self.compute_inference = bool(compute_inference)
|
|
130
|
+
self.max_iter = int(max_iter)
|
|
131
|
+
self.tol = float(tol)
|
|
132
|
+
self.stopping = str(stopping)
|
|
133
|
+
self.solver = str(solver)
|
|
134
|
+
self.cpu_solver = str(cpu_solver)
|
|
135
|
+
self.method = _normalize_lassocv_method(method)
|
|
136
|
+
self.cd_kkt_check_every = _normalize_cd_kkt_check_every(cd_kkt_check_every)
|
|
137
|
+
self.inference_method = str(inference_method)
|
|
138
|
+
self.lipschitz_L = lipschitz_L
|
|
139
|
+
self.admm_rho = float(admm_rho)
|
|
140
|
+
self.gpu_memory_cleanup = bool(gpu_memory_cleanup)
|
|
141
|
+
self.gpu_cv_mixed_precision = bool(gpu_cv_mixed_precision)
|
|
142
|
+
|
|
143
|
+
self.alpha_ = None
|
|
144
|
+
self.alphas_ = None
|
|
145
|
+
self.cv_results_ = None
|
|
146
|
+
self.mse_path_ = None
|
|
147
|
+
self.mean_mse_ = None
|
|
148
|
+
self.best_score_ = None
|
|
149
|
+
self.coef_ = None
|
|
150
|
+
self.intercept_ = None
|
|
151
|
+
self.n_iter_ = None
|
|
152
|
+
self.estimator_ = None
|
|
153
|
+
|
|
154
|
+
def fit(self, X, y, sample_weight=None):
|
|
155
|
+
"""
|
|
156
|
+
Fit Lasso regression with cross-validation to select alpha.
|
|
157
|
+
|
|
158
|
+
Delegates to ``_select_lasso_alpha_cv`` for CV with cache, fast-refit,
|
|
159
|
+
and backend-aware optimizations.
|
|
160
|
+
|
|
161
|
+
Parameters
|
|
162
|
+
----------
|
|
163
|
+
X : array-like
|
|
164
|
+
Training data (n_samples, n_features).
|
|
165
|
+
y : array-like
|
|
166
|
+
Target values.
|
|
167
|
+
sample_weight : array-like or None
|
|
168
|
+
Sample weights.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
self : LassoCV
|
|
173
|
+
Fitted estimator.
|
|
174
|
+
"""
|
|
175
|
+
from statgpu.linear_model.wrappers._lasso import _select_lasso_alpha_cv, Lasso
|
|
176
|
+
|
|
177
|
+
device_name = self._get_compute_device().value
|
|
178
|
+
effective_cpu_solver = (
|
|
179
|
+
"coordinate_descent" if str(self.method).lower() == "glmnet" else str(self.cpu_solver)
|
|
180
|
+
)
|
|
181
|
+
effective_cd_kkt = self.cd_kkt_check_every
|
|
182
|
+
if effective_cd_kkt is None:
|
|
183
|
+
effective_cd_kkt = 4 if str(self.method).lower() == "glmnet" else 1
|
|
184
|
+
|
|
185
|
+
details = _select_lasso_alpha_cv(
|
|
186
|
+
X, y,
|
|
187
|
+
alphas=self.alphas,
|
|
188
|
+
n_alphas=self.n_alphas,
|
|
189
|
+
alpha_min_ratio=self.alpha_min_ratio,
|
|
190
|
+
cv_folds=self.cv,
|
|
191
|
+
cv_splits=self.cv_splits,
|
|
192
|
+
random_state=self.random_state,
|
|
193
|
+
sample_weight=sample_weight,
|
|
194
|
+
fit_intercept=self.fit_intercept,
|
|
195
|
+
device=device_name,
|
|
196
|
+
max_iter=self.max_iter,
|
|
197
|
+
tol=self.tol,
|
|
198
|
+
cpu_solver=effective_cpu_solver,
|
|
199
|
+
method=self.method,
|
|
200
|
+
cd_kkt_check_every=effective_cd_kkt,
|
|
201
|
+
gpu_cv_mixed_precision=self.gpu_cv_mixed_precision,
|
|
202
|
+
return_details=True,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Store CV results
|
|
206
|
+
self.alpha_ = float(details["alpha"])
|
|
207
|
+
self.alphas_ = np.asarray(details["alphas"], dtype=np.float64)
|
|
208
|
+
mse_path = np.asarray(details["mse_path"], dtype=np.float64)
|
|
209
|
+
mean_mse = np.asarray(details["mean_mse"], dtype=np.float64)
|
|
210
|
+
|
|
211
|
+
self.cv_results_ = {"mse_path": mse_path}
|
|
212
|
+
self.mse_path_ = mse_path
|
|
213
|
+
self.mean_mse_ = mean_mse
|
|
214
|
+
# sklearn convention: best_score_ is negative MSE (higher is better)
|
|
215
|
+
self.best_score_ = -float(np.nanmin(mean_mse)) if np.any(np.isfinite(mean_mse)) else np.nan
|
|
216
|
+
|
|
217
|
+
# Fit final model with selected alpha
|
|
218
|
+
estimator = Lasso(
|
|
219
|
+
alpha=self.alpha_,
|
|
220
|
+
fit_intercept=self.fit_intercept,
|
|
221
|
+
max_iter=self.max_iter,
|
|
222
|
+
tol=self.tol,
|
|
223
|
+
stopping=self.stopping,
|
|
224
|
+
inference_method=self.inference_method,
|
|
225
|
+
device=self.device,
|
|
226
|
+
n_jobs=self.n_jobs,
|
|
227
|
+
compute_inference=self.compute_inference,
|
|
228
|
+
solver=self.solver,
|
|
229
|
+
cpu_solver=effective_cpu_solver,
|
|
230
|
+
lipschitz_L=self.lipschitz_L,
|
|
231
|
+
admm_rho=self.admm_rho,
|
|
232
|
+
gpu_memory_cleanup=self.gpu_memory_cleanup,
|
|
233
|
+
)
|
|
234
|
+
estimator.fit(X, y, sample_weight=sample_weight)
|
|
235
|
+
|
|
236
|
+
self.estimator_ = estimator
|
|
237
|
+
self.coef_ = np.asarray(estimator.coef_)
|
|
238
|
+
self.intercept_ = estimator.intercept_
|
|
239
|
+
self.n_iter_ = getattr(estimator, 'n_iter_', None)
|
|
240
|
+
|
|
241
|
+
# Copy inference attributes if available (preserve underscore prefix)
|
|
242
|
+
for attr in ('_bse', '_pvalues', '_tvalues', '_conf_int'):
|
|
243
|
+
val = getattr(estimator, attr, None)
|
|
244
|
+
if val is not None:
|
|
245
|
+
setattr(self, attr, np.asarray(val))
|
|
246
|
+
|
|
247
|
+
self._fitted = True
|
|
248
|
+
return self
|
|
249
|
+
|
|
250
|
+
def predict(self, X):
|
|
251
|
+
"""Predict using the fitted Lasso model."""
|
|
252
|
+
self._check_is_fitted()
|
|
253
|
+
return self.estimator_.predict(X)
|