statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,895 @@
1
+ """
2
+ LogisticRegressionCV: Cross-validated Logistic regression with GPU support.
3
+ """
4
+
5
+ __all__ = ["LogisticRegressionCV"]
6
+
7
+ from typing import Any, Dict, Optional, Tuple, Union
8
+ from collections import OrderedDict
9
+ import hashlib
10
+ import numpy as np
11
+
12
+ from statgpu._config import Device
13
+ from statgpu.cross_validation._base import CVEstimatorBase
14
+ from statgpu.backends import get_backend, _torch_dev
15
+ from statgpu.linear_model.wrappers._logistic import LogisticRegression
16
+
17
+
18
+ # =============================================================================
19
+ # CV Cache for LogisticRegression
20
+ # =============================================================================
21
+
22
+ import threading
23
+
24
+ _LOGISTIC_CV_C_CACHE_MAXSIZE = int(64)
25
+ _LOGISTIC_CV_C_CACHE: "OrderedDict[Tuple[Any, ...], Dict[str, Any]]" = OrderedDict()
26
+ _LOGISTIC_CV_CACHE_LOCK = threading.Lock()
27
+
28
+
29
+ def _logistic_cv_cache_get(key):
30
+ """Get cached LogisticRegression CV results."""
31
+ if key is None:
32
+ return None
33
+ with _LOGISTIC_CV_CACHE_LOCK:
34
+ val = _LOGISTIC_CV_C_CACHE.get(key)
35
+ if val is not None:
36
+ _LOGISTIC_CV_C_CACHE.move_to_end(key)
37
+ return val
38
+
39
+
40
+ def _logistic_cv_cache_put(key, value):
41
+ """Put cached LogisticRegression CV results."""
42
+ if key is None:
43
+ return
44
+ with _LOGISTIC_CV_CACHE_LOCK:
45
+ _LOGISTIC_CV_C_CACHE[key] = value
46
+ _LOGISTIC_CV_C_CACHE.move_to_end(key)
47
+ while len(_LOGISTIC_CV_C_CACHE) > _LOGISTIC_CV_C_CACHE_MAXSIZE:
48
+ _LOGISTIC_CV_C_CACHE.popitem(last=False)
49
+
50
+
51
+ from statgpu.cross_validation._base import hash_cv_data as _hash_logistic_data
52
+
53
+
54
+ def _make_logistic_cv_auto_cache_key(X, y, Cs, folds, fit_intercept, max_iter, tol, use_gpu, sample_weight=None):
55
+ """Generate automatic cache key for LogisticRegression CV."""
56
+ h = hashlib.blake2b(digest_size=32)
57
+ h.update(np.asarray(X.shape, dtype=np.int64).tobytes())
58
+ h.update(str(X.dtype).encode("utf-8"))
59
+ h.update(np.asarray(Cs, dtype=np.float64).tobytes())
60
+ h.update(str(fit_intercept).encode("utf-8"))
61
+ h.update(str(max_iter).encode("utf-8"))
62
+ h.update(str(tol).encode("utf-8"))
63
+ h.update(str(use_gpu).encode("utf-8"))
64
+ # Hash data content to avoid cross-dataset collisions
65
+ h.update(_hash_logistic_data(X, y, sample_weight))
66
+ # Hash fold indices (sample evenly to keep hash fast for large folds)
67
+ for train_idx, val_idx in folds:
68
+ train_arr = np.asarray(train_idx, dtype=np.int64)
69
+ val_arr = np.asarray(val_idx, dtype=np.int64)
70
+ # Hash a representative sample: first 5, last 5, and length
71
+ n_sample = min(5, len(train_arr))
72
+ h.update(train_arr[:n_sample].tobytes())
73
+ h.update(train_arr[-n_sample:].tobytes())
74
+ h.update(np.int64(len(train_arr)).tobytes())
75
+ n_sample_v = min(5, len(val_arr))
76
+ h.update(val_arr[:n_sample_v].tobytes())
77
+ h.update(val_arr[-n_sample_v:].tobytes())
78
+ h.update(np.int64(len(val_arr)).tobytes())
79
+ return h.hexdigest()
80
+
81
+
82
+ # =============================================================================
83
+ # K-fold helper (reuse from RidgeCV)
84
+ # =============================================================================
85
+
86
+ from statgpu.cross_validation._base import kfold_indices as _kfold_indices, folds_are_complete as _folds_are_complete
87
+
88
+
89
+ # =============================================================================
90
+ # C grid generation (C = 1/alpha, so we use similar approach)
91
+ # =============================================================================
92
+
93
+ def _default_logistic_c_grid(X, y, n_Cs: int = 100, C_min_ratio: float = 1e-3):
94
+ """
95
+ Generate default C grid for LogisticRegressionCV.
96
+
97
+ C values are log-spaced. Larger C = weaker regularization.
98
+
99
+ Parameters
100
+ ----------
101
+ X : ndarray
102
+ Design matrix (n_samples, n_features).
103
+ y : ndarray
104
+ Response vector.
105
+ n_Cs : int
106
+ Number of C values to generate.
107
+ C_min_ratio : float
108
+ Minimum C as a ratio of max C.
109
+
110
+ Returns
111
+ -------
112
+ Cs : ndarray
113
+ Log-spaced C values.
114
+ """
115
+ X_arr = np.asarray(X, dtype=np.float64)
116
+ y_arr = np.asarray(y, dtype=np.float64).reshape(-1)
117
+
118
+ # Estimate C_max based on data
119
+ # For logistic regression, C_max is where coefficients become very large
120
+ # We use a heuristic based on the gradient at zero coefficients.
121
+ # Gradient of logistic loss at beta=0: X'(y - sigmoid(0)) = X'(y - 0.5)
122
+ grad = X_arr.T @ (y_arr - 0.5)
123
+ C_max = np.max(np.abs(grad)) * 2.0 / len(y_arr)
124
+
125
+ if C_max == 0:
126
+ C_max = 1.0
127
+
128
+ C_min = C_max * C_min_ratio
129
+
130
+ # Log-spaced grid
131
+ if n_Cs <= 1:
132
+ return np.array([C_max])
133
+
134
+ Cs = np.logspace(
135
+ np.log10(C_min),
136
+ np.log10(C_max),
137
+ num=n_Cs,
138
+ dtype=np.float64,
139
+ )
140
+ return Cs
141
+
142
+
143
+ # =============================================================================
144
+ # Batch log-loss computation
145
+ # =============================================================================
146
+
147
+ def _batch_log_loss(y_val, probs_desc, sample_weight=None):
148
+ """
149
+ Compute log-loss for multiple probability vectors efficiently.
150
+
151
+ Parameters
152
+ ----------
153
+ y_val : ndarray
154
+ Validation labels (n_samples,).
155
+ probs_desc : ndarray
156
+ Predicted probabilities (n_Cs, n_samples).
157
+ sample_weight : ndarray or None
158
+ Sample weights.
159
+
160
+ Returns
161
+ -------
162
+ log_loss : ndarray
163
+ Log-loss for each C (n_Cs,).
164
+ """
165
+ n_Cs = probs_desc.shape[0]
166
+ eps = 1e-15
167
+
168
+ # Clip probabilities
169
+ probs_clipped = np.clip(probs_desc, eps, 1 - eps)
170
+
171
+ # Log-loss: -mean(y * log(p) + (1-y) * log(1-p))
172
+ ll = -(y_val.reshape(1, -1) * np.log(probs_clipped) +
173
+ (1 - y_val.reshape(1, -1)) * np.log(1 - probs_clipped))
174
+
175
+ if sample_weight is not None:
176
+ sw = np.asarray(sample_weight).reshape(1, -1)
177
+ log_loss = np.sum(sw * ll, axis=1) / np.sum(sw)
178
+ else:
179
+ log_loss = np.mean(ll, axis=1)
180
+
181
+ return log_loss
182
+
183
+
184
+ def _batch_log_loss_backend(y_val, probs_desc, backend, sample_weight=None):
185
+ """Compute log-loss for multiple probability vectors (backend-aware).
186
+
187
+ Delegates to numpy version when backend is numpy, otherwise uses
188
+ backend methods for GPU arrays.
189
+ """
190
+ xp = getattr(backend, 'xp', np)
191
+ eps = 1e-15
192
+ probs_clipped = xp.clip(probs_desc, eps, 1 - eps) if hasattr(xp, 'clip') else np.clip(probs_desc, eps, 1 - eps)
193
+
194
+ ll = -(y_val.reshape(1, -1) * xp.log(probs_clipped) +
195
+ (1 - y_val.reshape(1, -1)) * xp.log(1 - probs_clipped))
196
+
197
+ if sample_weight is not None:
198
+ sw = sample_weight.reshape(1, -1)
199
+ log_loss = xp.sum(sw * ll, axis=1) / xp.sum(sw)
200
+ else:
201
+ log_loss = xp.mean(ll, axis=1)
202
+
203
+ return log_loss
204
+
205
+
206
+ # =============================================================================
207
+ # GPU batch solver for Logistic (IRLS)
208
+ # =============================================================================
209
+
210
+ def _solve_logistic_path_gpu_from_batch(X_batch, y_batch, n_train_vec, Cs, backend, fit_intercept=True, max_iter=100, tol=1e-4, sw_batch=None):
211
+ """
212
+ Solve logistic regression path for multiple folds using batched IRLS.
213
+
214
+ Parameters
215
+ ----------
216
+ X_batch : array-like
217
+ Batch of design matrices (n_folds, n_train_max, n_features).
218
+ y_batch : array-like
219
+ Batch of labels (n_folds, n_train_max).
220
+ n_train_vec : np.ndarray
221
+ Number of training samples for each fold.
222
+ Cs : ndarray
223
+ C values.
224
+ backend : BackendBase
225
+ Backend instance (CuPyBackend or TorchBackend).
226
+ fit_intercept : bool
227
+ Whether to fit intercept.
228
+ max_iter : int
229
+ Maximum iterations for IRLS.
230
+ tol : float
231
+ Convergence tolerance.
232
+ sw_batch : array-like, optional
233
+ Batch of sample weights (n_folds, n_train_max).
234
+
235
+ Returns
236
+ -------
237
+ coefs_desc : ndarray
238
+ Coefficients for each C and fold (n_Cs, n_folds, n_features).
239
+ intercepts_desc : ndarray
240
+ Intercepts for each C and fold (n_Cs, n_folds).
241
+ """
242
+ xp = backend.xp
243
+
244
+ n_folds = X_batch.shape[0]
245
+ n_Cs = len(Cs)
246
+
247
+ # Allocate outputs
248
+ all_coefs = []
249
+ all_intercepts = []
250
+
251
+ for fold_idx in range(n_folds):
252
+ X_fold = X_batch[fold_idx][:n_train_vec[fold_idx]]
253
+ y_fold = y_batch[fold_idx][:n_train_vec[fold_idx]]
254
+ sw_fold = sw_batch[fold_idx][:n_train_vec[fold_idx]] if sw_batch is not None else None
255
+ n_train = n_train_vec[fold_idx]
256
+
257
+ fold_coefs = []
258
+ fold_intercepts = []
259
+
260
+ for C in Cs:
261
+ # Initialize
262
+ if fit_intercept:
263
+ ones_col = backend.ones(n_train, dtype=X_fold.dtype)
264
+ if _torch_dev(X_fold) is not None:
265
+ if ones_col.ndim == 1:
266
+ ones_col = ones_col.unsqueeze(1)
267
+ X_design = xp.cat([ones_col, X_fold], dim=1)
268
+ else:
269
+ X_design = xp.column_stack([ones_col, X_fold])
270
+ params = backend.zeros(X_design.shape[1])
271
+ else:
272
+ X_design = X_fold
273
+ params = backend.zeros(X_fold.shape[1])
274
+
275
+ # sklearn convention: reg term = 1/(2C) * ||w||^2, Hessian contribution = 1/C * I
276
+ alpha = 1.0 / C if C > 0 else 0.0
277
+
278
+ # IRLS
279
+ xp = backend.xp
280
+ for iteration in range(max_iter):
281
+ params_old = backend.copy(params)
282
+
283
+ eta = X_design @ params
284
+ p = 1 / (1 + xp.exp(-xp.clip(eta, -500, 500)))
285
+
286
+ W = p * (1 - p)
287
+ W = xp.clip(W, 1e-8, 1 - 1e-8)
288
+
289
+ z = eta + (y_fold - p) / W
290
+
291
+ # Apply sample weights to W for weighted IRLS
292
+ if sw_fold is not None:
293
+ W = W * sw_fold
294
+
295
+ XtWX = X_design.T @ (X_design * W[:, None])
296
+
297
+ if alpha > 0:
298
+ reg_diag = backend.full(XtWX.shape[0], alpha)
299
+ if fit_intercept:
300
+ reg_diag = backend.asarray(reg_diag)
301
+ reg_diag[0] = 0.0
302
+ XtWX += backend.diag(reg_diag)
303
+
304
+ Xtz = X_design.T @ (W * z)
305
+
306
+ try:
307
+ params = backend.solve(XtWX, Xtz)
308
+ except Exception:
309
+ lstsq_result = backend.lstsq(XtWX, Xtz)
310
+ params = lstsq_result[0]
311
+
312
+ if backend.sqrt(backend.sum((params - params_old) ** 2)) < tol:
313
+ break
314
+
315
+ if fit_intercept:
316
+ fold_coefs.append(backend.to_numpy(params[1:]))
317
+ fold_intercepts.append(float(backend.to_numpy(params[0])))
318
+ else:
319
+ fold_coefs.append(backend.to_numpy(params))
320
+ fold_intercepts.append(0.0)
321
+
322
+ all_coefs.append(np.stack(fold_coefs, axis=0))
323
+ all_intercepts.append(np.array(fold_intercepts))
324
+
325
+ coefs_desc = np.stack(all_coefs, axis=1) # (n_Cs, n_folds, n_features)
326
+ intercepts_desc = np.stack(all_intercepts, axis=1) # (n_Cs, n_folds)
327
+
328
+ return coefs_desc, intercepts_desc
329
+
330
+
331
+ # =============================================================================
332
+ # Main CV selection function
333
+ # =============================================================================
334
+
335
+ def _select_logistic_c_cv(
336
+ X,
337
+ y,
338
+ *,
339
+ Cs=None,
340
+ n_Cs: int = 100,
341
+ C_min_ratio: float = 1e-3,
342
+ cv_folds: int = 5,
343
+ cv_splits=None,
344
+ random_state: Optional[int] = None,
345
+ sample_weight=None,
346
+ fit_intercept: bool = True,
347
+ max_iter: int = 100,
348
+ tol: float = 1e-4,
349
+ device: Union[str, Device] = Device.CPU,
350
+ return_details: bool = False,
351
+ cache_key: Optional[Tuple[Any, ...]] = None,
352
+ gpu_cv_mixed_precision: bool = True,
353
+ ):
354
+ """
355
+ Select C for Logistic regression via K-fold cross-validation.
356
+
357
+ Parameters
358
+ ----------
359
+ X : array-like
360
+ Design matrix (n_samples, n_features).
361
+ y : array-like
362
+ Binary response vector.
363
+ Cs : array-like or None
364
+ C values to try. If None, generates n_Cs values.
365
+ n_Cs : int
366
+ Number of C values (if Cs is None).
367
+ C_min_ratio : float
368
+ Minimum C ratio.
369
+ cv_folds : int
370
+ Number of CV folds.
371
+ cv_splits : list or None
372
+ Pre-computed CV splits.
373
+ random_state : int or None
374
+ Random seed for CV splits.
375
+ sample_weight : array-like or None
376
+ Sample weights.
377
+ fit_intercept : bool
378
+ Whether to fit intercept.
379
+ max_iter : int
380
+ Maximum IRLS iterations.
381
+ tol : float
382
+ Convergence tolerance.
383
+ device : str or Device
384
+ Device to use ('cpu' or 'cuda').
385
+ return_details : bool
386
+ Whether to return full CV details.
387
+ cache_key : tuple or None
388
+ Cache key for CV results.
389
+ gpu_cv_mixed_precision : bool
390
+ Whether to use mixed precision on GPU.
391
+
392
+ Returns
393
+ -------
394
+ C : float
395
+ Best C value.
396
+ details : dict (if return_details=True)
397
+ Full CV results including C grid, loss path, etc.
398
+ """
399
+ device_name = str(device).lower()
400
+ use_gpu = device_name in (Device.CUDA.value, Device.TORCH.value)
401
+ gpu_requested = use_gpu
402
+
403
+ gpu_input_cupy = False
404
+ gpu_input_torch = False
405
+ if use_gpu:
406
+ # Check if inputs are already on GPU (CuPy or Torch)
407
+ try:
408
+ import cupy as cp
409
+ gpu_input_cupy = isinstance(X, cp.ndarray) and isinstance(y, cp.ndarray)
410
+ if sample_weight is not None and not isinstance(sample_weight, cp.ndarray):
411
+ gpu_input_cupy = False
412
+ except Exception:
413
+ pass
414
+
415
+ # Also check for torch tensors
416
+ if not gpu_input_cupy:
417
+ try:
418
+ import torch
419
+ gpu_input_torch = isinstance(X, torch.Tensor) and isinstance(y, torch.Tensor)
420
+ if sample_weight is not None and not isinstance(sample_weight, torch.Tensor):
421
+ gpu_input_torch = False
422
+ except Exception:
423
+ pass
424
+
425
+ X_np = None
426
+ y_np = None
427
+ sample_weight_np = None
428
+
429
+ if gpu_input_cupy or gpu_input_torch:
430
+ # GPU inputs - get backend for validation
431
+ backend = get_backend(backend='auto', device='cuda')
432
+ if len(tuple(X.shape)) != 2:
433
+ raise ValueError("X must be a 2D array")
434
+ n_samples = int(X.shape[0])
435
+ else:
436
+ X_np = np.asarray(X, dtype=np.float64)
437
+ y_np = np.asarray(y, dtype=np.float64).reshape(-1)
438
+ if sample_weight is not None:
439
+ sample_weight_np = np.asarray(sample_weight, dtype=np.float64).reshape(-1)
440
+ if X_np.ndim != 2:
441
+ raise ValueError("X must be a 2D array")
442
+ if y_np.shape[0] != X_np.shape[0]:
443
+ raise ValueError("y must have the same number of rows as X")
444
+ n_samples = int(X_np.shape[0])
445
+
446
+ # Generate C grid
447
+ if Cs is None:
448
+ if gpu_input_cupy or gpu_input_torch:
449
+ # GPU path for C grid generation
450
+ # Gradient of logistic loss at beta=0: X'(y - sigmoid(0)) = X'(y - 0.5)
451
+ # Do NOT center X/y — centering is incorrect for logistic regression
452
+ backend = get_backend(backend='auto', device='cuda')
453
+ X_temp = backend.asarray(X)
454
+ y_temp = backend.asarray(y)
455
+ grad = X_temp.T @ (y_temp - 0.5)
456
+ C_max = float(backend.max(backend.abs(grad)) * 2.0 / len(y_temp))
457
+ if C_max == 0:
458
+ C_max = 1.0
459
+ C_min = C_max * C_min_ratio
460
+ C_grid = np.logspace(np.log10(C_min), np.log10(C_max), num=n_Cs)
461
+ else:
462
+ C_grid = _default_logistic_c_grid(X_np, y_np, n_Cs=n_Cs, C_min_ratio=C_min_ratio)
463
+ else:
464
+ C_grid = np.asarray(Cs, dtype=np.float64)
465
+ C_grid = C_grid[np.isfinite(C_grid)]
466
+ C_grid = C_grid[C_grid > 0.0]
467
+ if C_grid.size == 0:
468
+ if gpu_input_cupy or gpu_input_torch:
469
+ # GPU path for C grid generation
470
+ backend = get_backend(backend='auto', device='cuda')
471
+ X_temp = backend.asarray(X)
472
+ y_temp = backend.asarray(y)
473
+ grad = X_temp.T @ (y_temp - 0.5)
474
+ C_max = float(backend.max(backend.abs(grad)) * 2.0 / len(y_temp))
475
+ if C_max == 0:
476
+ C_max = 1.0
477
+ C_min = C_max * C_min_ratio
478
+ C_grid = np.logspace(np.log10(C_min), np.log10(C_max), num=n_Cs)
479
+ else:
480
+ C_grid = _default_logistic_c_grid(X_np, y_np, n_Cs=n_Cs, C_min_ratio=C_min_ratio)
481
+
482
+ # Handle degenerate cases
483
+ if int(n_samples) < 4 or int(C_grid.size) == 1 or int(cv_folds) < 2:
484
+ C0 = float(C_grid[0])
485
+ if not return_details:
486
+ return C0
487
+ return {
488
+ "C": C0,
489
+ "Cs": C_grid.astype(np.float64, copy=False),
490
+ "loss_path": np.full((int(C_grid.size), 1), np.nan, dtype=np.float64),
491
+ "mean_loss": np.full(int(C_grid.size), np.nan, dtype=np.float64),
492
+ }
493
+
494
+ # Generate CV folds
495
+ if cv_splits is not None:
496
+ from statgpu.linear_model.wrappers._lasso import _normalize_cv_splits
497
+ folds = _normalize_cv_splits(cv_splits, n_samples=int(n_samples))
498
+ else:
499
+ folds = _kfold_indices(n_samples=int(n_samples), n_splits=int(cv_folds), random_state=random_state)
500
+
501
+
502
+ C_grid = C_grid.astype(np.float64, copy=False)
503
+ n_C = int(C_grid.size)
504
+ n_folds = int(len(folds))
505
+
506
+ # Cache handling
507
+ # Auto-cache disabled by default to prevent stale results across datasets.
508
+ cache_key_eff = cache_key
509
+
510
+ cached_details = _logistic_cv_cache_get(cache_key_eff)
511
+ if cached_details is not None:
512
+ if return_details:
513
+ return cached_details
514
+ return float(cached_details["C"])
515
+
516
+ # Initialize loss path
517
+ loss_path = np.full((n_C, n_folds), np.nan, dtype=np.float64)
518
+
519
+ # GPU path
520
+ if use_gpu:
521
+ try:
522
+ # Get backend - supports both CuPy and Torch
523
+ backend = get_backend(backend='auto', device='cuda')
524
+ xp = backend.xp
525
+
526
+ cv_dtype = backend.float32 if bool(gpu_cv_mixed_precision) else backend.float64
527
+
528
+ # Convert inputs to backend arrays
529
+ if gpu_input_cupy or gpu_input_torch:
530
+ # Already on GPU (CuPy or Torch)
531
+ X_full = backend.asarray(X, dtype=cv_dtype)
532
+ y_full = backend.asarray(y, dtype=cv_dtype).reshape(-1)
533
+ if sample_weight is not None:
534
+ sw_full = backend.asarray(sample_weight, dtype=cv_dtype).reshape(-1)
535
+ else:
536
+ sw_full = None
537
+ else:
538
+ # Convert from numpy
539
+ X_full = backend.asarray(X_np, dtype=cv_dtype)
540
+ y_full = backend.asarray(y_np, dtype=cv_dtype)
541
+ if sample_weight_np is not None:
542
+ sw_full = backend.asarray(sample_weight_np, dtype=cv_dtype)
543
+ else:
544
+ sw_full = None
545
+
546
+ # Prepare batch data
547
+ X_batch_list = []
548
+ y_batch_list = []
549
+ sw_batch_list = []
550
+ n_train_folds = []
551
+ fold_eval_payload = []
552
+
553
+ for fold_idx, (train_idx, val_idx) in enumerate(folds):
554
+ train_idx_gpu = backend.asarray(train_idx)
555
+ val_idx_gpu = backend.asarray(val_idx)
556
+
557
+ X_train = X_full[train_idx_gpu]
558
+ y_train = y_full[train_idx_gpu]
559
+ X_val = X_full[val_idx_gpu]
560
+ y_val = y_full[val_idx_gpu]
561
+ sw_val = None if sw_full is None else sw_full[val_idx_gpu]
562
+ sw_train = None if sw_full is None else sw_full[train_idx_gpu]
563
+
564
+ X_batch_list.append(X_train)
565
+ y_batch_list.append(y_train)
566
+ sw_batch_list.append(sw_train)
567
+ n_train_folds.append(int(X_train.shape[0]))
568
+ fold_eval_payload.append((X_val, y_val, sw_val))
569
+
570
+ # Pad batch to same size
571
+ n_train_max = max(n_train_folds)
572
+ n_features = X_full.shape[1]
573
+
574
+ X_batch = backend.zeros((n_folds, n_train_max, n_features), dtype=cv_dtype)
575
+ y_batch = backend.zeros((n_folds, n_train_max), dtype=cv_dtype)
576
+ has_sw = sw_batch_list[0] is not None
577
+ sw_batch = backend.zeros((n_folds, n_train_max), dtype=cv_dtype) if has_sw else None
578
+
579
+ for fold_idx in range(n_folds):
580
+ n_train = n_train_folds[fold_idx]
581
+ X_batch[fold_idx, :n_train] = X_batch_list[fold_idx]
582
+ y_batch[fold_idx, :n_train] = y_batch_list[fold_idx]
583
+ if sw_batch is not None and sw_batch_list[fold_idx] is not None:
584
+ sw_batch[fold_idx, :n_train] = sw_batch_list[fold_idx]
585
+
586
+ n_train_vec = np.asarray(n_train_folds, dtype=np.int32)
587
+
588
+ # Solve for all Cs
589
+ coefs_batch, intercepts_batch = _solve_logistic_path_gpu_from_batch(
590
+ X_batch, y_batch, n_train_vec, C_grid, backend,
591
+ fit_intercept=bool(fit_intercept), max_iter=max_iter, tol=tol,
592
+ sw_batch=sw_batch
593
+ )
594
+
595
+ # Evaluate log-loss for each fold and C (vectorized across C)
596
+ for fold_idx in range(n_folds):
597
+ X_val, y_val, sw_val = fold_eval_payload[fold_idx]
598
+ n_val = int(X_val.shape[0])
599
+
600
+ # Batched matmul: X_val @ coefs_all.T for all C at once
601
+ # coefs_batch shape: (n_C, n_folds, n_features)
602
+ coefs_all = backend.asarray(coefs_batch[:, fold_idx, :]) # (n_C, n_features)
603
+ intercepts_all = backend.asarray(intercepts_batch[:, fold_idx]) # (n_C,)
604
+
605
+ # eta_all shape: (n_val, n_C)
606
+ xp = backend.xp
607
+ eta_all = X_val @ coefs_all.T + intercepts_all.reshape(1, -1)
608
+ # probs_all shape: (n_C, n_val)
609
+ probs_all = (1 / (1 + xp.exp(-xp.clip(eta_all, -500, 500)))).T
610
+
611
+ loss_desc = _batch_log_loss_backend(y_val, probs_all, backend, sw_val)
612
+ loss_path[:, fold_idx] = backend.to_numpy(loss_desc)
613
+
614
+ except Exception as exc:
615
+ raise RuntimeError(
616
+ "GPU path failed in _select_logistic_c_cv with device='cuda'; "
617
+ "CPU fallback is disabled for strict CUDA execution."
618
+ ) from exc
619
+
620
+ # CPU path
621
+ if not use_gpu:
622
+ if gpu_requested:
623
+ raise RuntimeError(
624
+ "device='cuda' requested but GPU path was not executed; "
625
+ "CPU fallback is disabled for strict CUDA execution."
626
+ )
627
+
628
+ for fold_idx, (train_idx, val_idx) in enumerate(folds):
629
+ X_train = X_np[train_idx]
630
+ y_train = y_np[train_idx]
631
+ X_val = X_np[val_idx]
632
+ y_val = y_np[val_idx]
633
+ sw_val = None if sample_weight_np is None else sample_weight_np[val_idx]
634
+
635
+ # Fit logistic regression for each C
636
+ fold_losses = []
637
+ for C in C_grid:
638
+ model = LogisticRegression(
639
+ C=C,
640
+ fit_intercept=fit_intercept,
641
+ max_iter=max_iter,
642
+ tol=tol,
643
+ device='cpu',
644
+ compute_inference=False,
645
+ )
646
+ model.fit(X_train, y_train, sample_weight=sample_weight_np[train_idx] if sample_weight_np is not None else None)
647
+
648
+ # Predict probabilities on validation set
649
+ probs = model.predict_proba(X_val)[:, 1]
650
+
651
+ # Compute log-loss
652
+ eps = 1e-15
653
+ probs_clipped = np.clip(probs, eps, 1 - eps)
654
+ ll = -(y_val * np.log(probs_clipped) + (1 - y_val) * np.log(1 - probs_clipped))
655
+
656
+ if sw_val is not None:
657
+ fold_losses.append(np.sum(sw_val * ll) / np.sum(sw_val))
658
+ else:
659
+ fold_losses.append(np.mean(ll))
660
+
661
+ loss_path[:, fold_idx] = fold_losses
662
+
663
+ # Compute mean loss across folds
664
+ mean_loss = np.nanmean(loss_path, axis=1)
665
+
666
+ # Find best C (minimum loss)
667
+ best_idx = int(np.nanargmin(mean_loss))
668
+ best_C = float(C_grid[best_idx])
669
+
670
+ details = {
671
+ "C": best_C,
672
+ "Cs": C_grid,
673
+ "loss_path": loss_path,
674
+ "mean_loss": mean_loss,
675
+ }
676
+
677
+ _logistic_cv_cache_put(cache_key_eff, details)
678
+
679
+ if return_details:
680
+ return details
681
+ return best_C
682
+
683
+
684
+ # =============================================================================
685
+ # LogisticRegressionCV Class
686
+ # =============================================================================
687
+
688
+ class LogisticRegressionCV(CVEstimatorBase):
689
+ """
690
+ Cross-validated Logistic regression with GPU support.
691
+
692
+ This class implements K-fold cross-validation to select the optimal
693
+ regularization parameter C for Logistic regression.
694
+
695
+ Parameters
696
+ ----------
697
+ Cs : array-like or None
698
+ C values to try. If None, generates n_Cs values.
699
+ n_Cs : int
700
+ Number of C values (if Cs is None). Default is 100.
701
+ C_min_ratio : float
702
+ Minimum C as a ratio of max C.
703
+ cv : int
704
+ Number of CV folds. Default is 5.
705
+ fit_intercept : bool
706
+ Whether to fit intercepts. Default is True.
707
+ max_iter : int
708
+ Maximum number of IRLS iterations. Default is 100.
709
+ tol : float
710
+ Convergence tolerance. Default is 1e-4.
711
+ device : str or Device
712
+ Computation device: 'cpu', 'cuda', or 'auto'.
713
+ compute_inference : bool
714
+ Whether to compute standard errors, z-stats, p-values and CI.
715
+ cov_type : str
716
+ Covariance estimator for inference. One of:
717
+ 'nonrobust', 'hc0', 'hc1', 'hc2', 'hc3', 'hac'.
718
+ gpu_memory_cleanup : bool
719
+ Whether to free CuPy memory pool after fitting.
720
+ random_state : int or None
721
+ Random seed for CV splits.
722
+ gpu_cv_mixed_precision : bool
723
+ Whether to use mixed precision on GPU.
724
+
725
+ Attributes
726
+ ----------
727
+ C_ : float
728
+ Selected C value.
729
+ Cs_ : ndarray
730
+ All C values tested.
731
+ cv_results_ : dict
732
+ CV results including loss_path and mean_loss.
733
+ best_score_ : float
734
+ Best (minimum) log-loss across CV folds.
735
+ coef_ : ndarray
736
+ Coefficients of the final model.
737
+ intercept_ : float
738
+ Intercept of the final model.
739
+ estimator_ : LogisticRegression
740
+ The fitted LogisticRegression with selected C.
741
+
742
+ Examples
743
+ --------
744
+ >>> import numpy as np
745
+ >>> from statgpu.linear_model import LogisticRegressionCV
746
+ >>> X = np.random.randn(1000, 20)
747
+ >>> y = (X @ np.random.randn(20) > 0).astype(int)
748
+ >>> model = LogisticRegressionCV(cv=5, device='cuda')
749
+ >>> model.fit(X, y)
750
+ >>> print(f"Selected C: {model.C_:.4f}")
751
+ >>> print(f"Best CV score: {model.best_score_:.4f}")
752
+ """
753
+
754
+ def __init__(
755
+ self,
756
+ Cs=None,
757
+ n_Cs: int = 100,
758
+ C_min_ratio: float = 1e-3,
759
+ cv: int = 5,
760
+ cv_splits=None,
761
+ fit_intercept: bool = True,
762
+ max_iter: int = 100,
763
+ tol: float = 1e-4,
764
+ device: Union[str, Device] = Device.AUTO,
765
+ n_jobs: Optional[int] = None,
766
+ compute_inference: bool = True,
767
+ cov_type: str = "nonrobust",
768
+ gpu_memory_cleanup: bool = False,
769
+ random_state: Optional[int] = None,
770
+ gpu_cv_mixed_precision: bool = True,
771
+ ):
772
+ super().__init__(
773
+ cv=cv,
774
+ random_state=random_state,
775
+ device=device,
776
+ n_jobs=n_jobs,
777
+ )
778
+ self.Cs = Cs
779
+ self.n_Cs = int(n_Cs)
780
+ self.C_min_ratio = float(C_min_ratio)
781
+ self.cv = int(cv)
782
+ self.cv_splits = cv_splits
783
+ self.fit_intercept = bool(fit_intercept)
784
+ self.max_iter = int(max_iter)
785
+ self.tol = float(tol)
786
+ self.compute_inference = bool(compute_inference)
787
+ self.cov_type = str(cov_type)
788
+ self.gpu_memory_cleanup = bool(gpu_memory_cleanup)
789
+ self.gpu_cv_mixed_precision = bool(gpu_cv_mixed_precision)
790
+
791
+ self.C_ = None
792
+ self.Cs_ = None
793
+ self.cv_results_ = None
794
+ self.mean_loss_ = None
795
+ self.best_score_ = None
796
+ self.coef_ = None
797
+ self.intercept_ = None
798
+ self.n_iter_ = None
799
+ self.estimator_ = None
800
+
801
+ def fit(self, X, y, sample_weight=None):
802
+ """
803
+ Fit Logistic regression with cross-validation to select C.
804
+
805
+ Parameters
806
+ ----------
807
+ X : array-like
808
+ Training data (n_samples, n_features).
809
+ y : array-like
810
+ Target values (binary: 0 or 1).
811
+ sample_weight : array-like or None
812
+ Sample weights.
813
+
814
+ Returns
815
+ -------
816
+ self : LogisticRegressionCV
817
+ Fitted estimator.
818
+ """
819
+ # Validate y is binary
820
+ y_arr = np.asarray(y, dtype=np.float64).ravel()
821
+ unique_y = np.unique(y_arr)
822
+ if not np.all(np.isin(unique_y, [0.0, 1.0])):
823
+ raise ValueError(
824
+ f"LogisticRegressionCV requires binary y (0 or 1), "
825
+ f"got unique values: {unique_y[:10]}"
826
+ )
827
+
828
+ device_name = self._get_compute_device().value
829
+
830
+ # Run CV to select C
831
+ details = _select_logistic_c_cv(
832
+ X,
833
+ y,
834
+ Cs=self.Cs,
835
+ n_Cs=self.n_Cs,
836
+ C_min_ratio=self.C_min_ratio,
837
+ cv_folds=self.cv,
838
+ cv_splits=self.cv_splits,
839
+ random_state=self.random_state,
840
+ sample_weight=sample_weight,
841
+ fit_intercept=self.fit_intercept,
842
+ max_iter=self.max_iter,
843
+ tol=self.tol,
844
+ device=device_name,
845
+ gpu_cv_mixed_precision=self.gpu_cv_mixed_precision,
846
+ return_details=True,
847
+ )
848
+
849
+ # Store CV results
850
+ self.C_ = float(details["C"])
851
+ self.Cs_ = np.asarray(details["Cs"], dtype=np.float64)
852
+ loss_path = np.asarray(details["loss_path"], dtype=np.float64)
853
+ mean_loss = np.asarray(details["mean_loss"], dtype=np.float64)
854
+
855
+ self.cv_results_ = {"loss_path": loss_path}
856
+ self.mean_loss_ = mean_loss
857
+
858
+ if np.any(np.isfinite(mean_loss)):
859
+ # sklearn convention: best_score_ is negative loss (higher is better)
860
+ self.best_score_ = -float(np.nanmin(mean_loss))
861
+ else:
862
+ self.best_score_ = np.nan
863
+
864
+ # Fit final model with selected C
865
+ estimator = LogisticRegression(
866
+ C=self.C_,
867
+ fit_intercept=self.fit_intercept,
868
+ max_iter=self.max_iter,
869
+ tol=self.tol,
870
+ device=self.device,
871
+ n_jobs=self.n_jobs,
872
+ compute_inference=self.compute_inference,
873
+ cov_type=self.cov_type,
874
+ gpu_memory_cleanup=self.gpu_memory_cleanup,
875
+ )
876
+
877
+ estimator.fit(X, y, sample_weight=sample_weight)
878
+
879
+ self.estimator_ = estimator
880
+ self.coef_ = np.asarray(estimator.coef_)
881
+ self.intercept_ = estimator.intercept_
882
+ self.n_iter_ = getattr(estimator, 'n_iter_', None)
883
+
884
+ self._fitted = True
885
+ return self
886
+
887
+ def predict(self, X):
888
+ """Predict class labels using the fitted Logistic model."""
889
+ self._check_is_fitted()
890
+ return self.estimator_.predict(X)
891
+
892
+ def predict_proba(self, X):
893
+ """Predict class probabilities."""
894
+ self._check_is_fitted()
895
+ return self.estimator_.predict_proba(X)