statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,253 @@
1
+ """
2
+ LassoCV: Cross-validated Lasso regression with GPU support.
3
+
4
+ This module exports LassoCV which delegates to _select_lasso_alpha_cv
5
+ from _lasso.py for all CV logic (cache, fast-refit, backend-aware).
6
+ """
7
+
8
+ __all__ = ["LassoCV"]
9
+
10
+ from typing import Optional, Union
11
+
12
+ import numpy as np
13
+
14
+ from statgpu._config import Device
15
+ from statgpu.cross_validation._base import CVEstimatorBase
16
+ from statgpu.linear_model.wrappers._lasso import (
17
+ Lasso,
18
+ _normalize_lassocv_method,
19
+ _normalize_cd_kkt_check_every,
20
+ )
21
+
22
+
23
+ # Shared hash function from _cv_base.py
24
+ from statgpu.cross_validation._base import hash_cv_data as _hash_data
25
+
26
+
27
+ # =============================================================================
28
+ # LassoCV Class
29
+ # =============================================================================
30
+
31
+ class LassoCV(CVEstimatorBase):
32
+ """
33
+ Cross-validated Lasso regression with GPU support.
34
+
35
+ This class implements K-fold cross-validation to select the optimal
36
+ regularization parameter alpha for Lasso regression.
37
+
38
+ Parameters
39
+ ----------
40
+ alphas : array-like or None
41
+ Alpha values to try. If None, generates n_alphas values.
42
+ n_alphas : int
43
+ Number of alpha values (if alphas is None). Default is 12.
44
+ alpha_min_ratio : float
45
+ Minimum alpha as a ratio of max alpha.
46
+ cv : int
47
+ Number of CV folds. Default is 5.
48
+ fit_intercept : bool
49
+ Whether to fit intercept. Default is False.
50
+ device : str or Device
51
+ Computation device: 'cpu', 'cuda', or 'auto'.
52
+ max_iter : int
53
+ Maximum iterations for Lasso solver. Default is 3000.
54
+ tol : float
55
+ Convergence tolerance. Default is 1e-4.
56
+ compute_inference : bool
57
+ Whether to compute standard errors, t-stats, p-values and CI.
58
+ random_state : int or None
59
+ Random seed for CV splits.
60
+ gpu_cv_mixed_precision : bool
61
+ Whether to use mixed precision on GPU.
62
+
63
+ Attributes
64
+ ----------
65
+ alpha_ : float
66
+ Selected alpha value.
67
+ alphas_ : ndarray
68
+ All alpha values tested.
69
+ cv_results_ : dict
70
+ CV results including mse_path and mean_mse.
71
+ best_score_ : float
72
+ Best (minimum) MSE across CV folds.
73
+ coef_ : ndarray
74
+ Coefficients of the final model.
75
+ intercept_ : float
76
+ Intercept of the final model.
77
+ estimator_ : Lasso
78
+ The fitted Lasso estimator with selected alpha.
79
+
80
+ Examples
81
+ --------
82
+ >>> import numpy as np
83
+ >>> from statgpu.linear_model import LassoCV
84
+ >>> X = np.random.randn(1000, 20)
85
+ >>> y = X @ np.random.randn(20) + 0.1 * np.random.randn(1000)
86
+ >>> model = LassoCV(cv=5, device='cuda')
87
+ >>> model.fit(X, y)
88
+ >>> print(f"Selected alpha: {model.alpha_:.4f}")
89
+ >>> print(f"Best CV score: {model.best_score_:.4f}")
90
+ """
91
+
92
+ def __init__(
93
+ self,
94
+ alphas=None,
95
+ n_alphas: int = 12,
96
+ alpha_min_ratio: float = 1e-3,
97
+ cv: int = 5,
98
+ cv_splits=None,
99
+ fit_intercept: bool = True,
100
+ device: Union[str, Device] = Device.AUTO,
101
+ n_jobs: Optional[int] = None,
102
+ compute_inference: bool = False,
103
+ max_iter: int = 3000,
104
+ tol: float = 1e-4,
105
+ stopping: str = "coef_delta",
106
+ solver: str = "fista",
107
+ cpu_solver: str = "coordinate_descent",
108
+ method: str = "standard",
109
+ cd_kkt_check_every: Optional[int] = None,
110
+ inference_method: str = "cpu_ols_inference",
111
+ lipschitz_L: Optional[float] = None,
112
+ admm_rho: float = 1.0,
113
+ gpu_memory_cleanup: bool = False,
114
+ random_state: Optional[int] = None,
115
+ gpu_cv_mixed_precision: bool = True,
116
+ ):
117
+ super().__init__(
118
+ cv=cv,
119
+ random_state=random_state,
120
+ device=device,
121
+ n_jobs=n_jobs,
122
+ )
123
+ self.alphas = alphas
124
+ self.n_alphas = int(n_alphas)
125
+ self.alpha_min_ratio = float(alpha_min_ratio)
126
+ self.cv = int(cv)
127
+ self.cv_splits = cv_splits
128
+ self.fit_intercept = bool(fit_intercept)
129
+ self.compute_inference = bool(compute_inference)
130
+ self.max_iter = int(max_iter)
131
+ self.tol = float(tol)
132
+ self.stopping = str(stopping)
133
+ self.solver = str(solver)
134
+ self.cpu_solver = str(cpu_solver)
135
+ self.method = _normalize_lassocv_method(method)
136
+ self.cd_kkt_check_every = _normalize_cd_kkt_check_every(cd_kkt_check_every)
137
+ self.inference_method = str(inference_method)
138
+ self.lipschitz_L = lipschitz_L
139
+ self.admm_rho = float(admm_rho)
140
+ self.gpu_memory_cleanup = bool(gpu_memory_cleanup)
141
+ self.gpu_cv_mixed_precision = bool(gpu_cv_mixed_precision)
142
+
143
+ self.alpha_ = None
144
+ self.alphas_ = None
145
+ self.cv_results_ = None
146
+ self.mse_path_ = None
147
+ self.mean_mse_ = None
148
+ self.best_score_ = None
149
+ self.coef_ = None
150
+ self.intercept_ = None
151
+ self.n_iter_ = None
152
+ self.estimator_ = None
153
+
154
+ def fit(self, X, y, sample_weight=None):
155
+ """
156
+ Fit Lasso regression with cross-validation to select alpha.
157
+
158
+ Delegates to ``_select_lasso_alpha_cv`` for CV with cache, fast-refit,
159
+ and backend-aware optimizations.
160
+
161
+ Parameters
162
+ ----------
163
+ X : array-like
164
+ Training data (n_samples, n_features).
165
+ y : array-like
166
+ Target values.
167
+ sample_weight : array-like or None
168
+ Sample weights.
169
+
170
+ Returns
171
+ -------
172
+ self : LassoCV
173
+ Fitted estimator.
174
+ """
175
+ from statgpu.linear_model.wrappers._lasso import _select_lasso_alpha_cv, Lasso
176
+
177
+ device_name = self._get_compute_device().value
178
+ effective_cpu_solver = (
179
+ "coordinate_descent" if str(self.method).lower() == "glmnet" else str(self.cpu_solver)
180
+ )
181
+ effective_cd_kkt = self.cd_kkt_check_every
182
+ if effective_cd_kkt is None:
183
+ effective_cd_kkt = 4 if str(self.method).lower() == "glmnet" else 1
184
+
185
+ details = _select_lasso_alpha_cv(
186
+ X, y,
187
+ alphas=self.alphas,
188
+ n_alphas=self.n_alphas,
189
+ alpha_min_ratio=self.alpha_min_ratio,
190
+ cv_folds=self.cv,
191
+ cv_splits=self.cv_splits,
192
+ random_state=self.random_state,
193
+ sample_weight=sample_weight,
194
+ fit_intercept=self.fit_intercept,
195
+ device=device_name,
196
+ max_iter=self.max_iter,
197
+ tol=self.tol,
198
+ cpu_solver=effective_cpu_solver,
199
+ method=self.method,
200
+ cd_kkt_check_every=effective_cd_kkt,
201
+ gpu_cv_mixed_precision=self.gpu_cv_mixed_precision,
202
+ return_details=True,
203
+ )
204
+
205
+ # Store CV results
206
+ self.alpha_ = float(details["alpha"])
207
+ self.alphas_ = np.asarray(details["alphas"], dtype=np.float64)
208
+ mse_path = np.asarray(details["mse_path"], dtype=np.float64)
209
+ mean_mse = np.asarray(details["mean_mse"], dtype=np.float64)
210
+
211
+ self.cv_results_ = {"mse_path": mse_path}
212
+ self.mse_path_ = mse_path
213
+ self.mean_mse_ = mean_mse
214
+ # sklearn convention: best_score_ is negative MSE (higher is better)
215
+ self.best_score_ = -float(np.nanmin(mean_mse)) if np.any(np.isfinite(mean_mse)) else np.nan
216
+
217
+ # Fit final model with selected alpha
218
+ estimator = Lasso(
219
+ alpha=self.alpha_,
220
+ fit_intercept=self.fit_intercept,
221
+ max_iter=self.max_iter,
222
+ tol=self.tol,
223
+ stopping=self.stopping,
224
+ inference_method=self.inference_method,
225
+ device=self.device,
226
+ n_jobs=self.n_jobs,
227
+ compute_inference=self.compute_inference,
228
+ solver=self.solver,
229
+ cpu_solver=effective_cpu_solver,
230
+ lipschitz_L=self.lipschitz_L,
231
+ admm_rho=self.admm_rho,
232
+ gpu_memory_cleanup=self.gpu_memory_cleanup,
233
+ )
234
+ estimator.fit(X, y, sample_weight=sample_weight)
235
+
236
+ self.estimator_ = estimator
237
+ self.coef_ = np.asarray(estimator.coef_)
238
+ self.intercept_ = estimator.intercept_
239
+ self.n_iter_ = getattr(estimator, 'n_iter_', None)
240
+
241
+ # Copy inference attributes if available (preserve underscore prefix)
242
+ for attr in ('_bse', '_pvalues', '_tvalues', '_conf_int'):
243
+ val = getattr(estimator, attr, None)
244
+ if val is not None:
245
+ setattr(self, attr, np.asarray(val))
246
+
247
+ self._fitted = True
248
+ return self
249
+
250
+ def predict(self, X):
251
+ """Predict using the fitted Lasso model."""
252
+ self._check_is_fitted()
253
+ return self.estimator_.predict(X)