statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,748 @@
1
+ """Kernel regression with NumPy/CuPy backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Optional, Union
6
+
7
+ import numpy as np
8
+
9
+ from statgpu._base import BaseEstimator
10
+ from statgpu.backends import (
11
+ _torch_dev,
12
+ xp_asarray,
13
+ xp_empty,
14
+ xp_eye,
15
+ xp_full,
16
+ xp_maximum,
17
+ xp_ones,
18
+ )
19
+ from statgpu.nonparametric.kernel_smoothing._bandwidth_selection import select_bandwidth
20
+
21
+ from statgpu.nonparametric.kernel_smoothing._kernel_common import (
22
+ _auto_backend_from_device,
23
+ _as_points_2d,
24
+ _as_samples_2d,
25
+ _effective_sample_size,
26
+ _get_xp,
27
+ _kernel_values_from_quad,
28
+ _normalize_kernel_name,
29
+ _normalize_regression_name,
30
+ _normalize_weights,
31
+ _stable_inv_and_det,
32
+ _to_float_scalar,
33
+ _to_numpy,
34
+ _weighted_covariance,
35
+ )
36
+
37
+
38
+ class KernelRegression(BaseEstimator):
39
+ """sklearn-style kernel regression model (Nadaraya-Watson or local-linear)."""
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ bandwidth: Union[str, float, int] = "scott",
45
+ weights=None,
46
+ kernel: str = "gaussian",
47
+ regression: str = "nw",
48
+ kernel_metric: str = "full",
49
+ bandwidth_per_feature=None,
50
+ backend: str = "auto",
51
+ device: str = "auto",
52
+ n_jobs: Optional[int] = None,
53
+ batch_size: int = 1024,
54
+ min_effective_weight: float = 1e-12,
55
+ gpu_memory_cleanup: bool = False,
56
+ ):
57
+ super().__init__(device=device, n_jobs=n_jobs)
58
+ self.bandwidth = bandwidth
59
+ self.weights = weights
60
+ self.kernel = kernel
61
+ self.regression = regression
62
+ self.kernel_metric = kernel_metric
63
+ self.bandwidth_per_feature = bandwidth_per_feature
64
+ self.backend = backend
65
+ self.batch_size = int(batch_size)
66
+ self.min_effective_weight = float(min_effective_weight)
67
+ self.gpu_memory_cleanup = gpu_memory_cleanup
68
+
69
+ def _resolve_backend_name(self, X, y) -> str:
70
+ backend_name = str(self.backend).strip().lower()
71
+ if backend_name != "auto":
72
+ return backend_name
73
+ return _auto_backend_from_device(self._get_compute_device().value)
74
+
75
+ def fit(self, X, y):
76
+ """Fit kernel regression and cache model state on this instance."""
77
+ backend_name = self._resolve_backend_name(X, y)
78
+ xp = _get_xp(backend_name)
79
+
80
+ samples_2d = _as_samples_2d(X, xp)
81
+ n_samples, n_features = int(samples_2d.shape[0]), int(samples_2d.shape[1])
82
+
83
+ targets_2d, target_was_1d = _as_targets_2d(y, n_samples, xp, ref_arr=samples_2d)
84
+ n_targets = int(targets_2d.shape[1])
85
+
86
+ weights_1d = _normalize_weights(self.weights, n_samples, xp, ref_arr=samples_2d)
87
+ n_eff = _effective_sample_size(weights_1d, xp)
88
+
89
+ kernel_name = _normalize_kernel_name(self.kernel)
90
+ regression_name = _normalize_regression_name(self.regression)
91
+ metric_name = _normalize_kernel_metric_name(self.kernel_metric)
92
+ if kernel_name in ("cosine", "optcosine") and n_features != 1:
93
+ raise ValueError(f"kernel='{kernel_name}' currently supports only 1D samples")
94
+
95
+ data_cov = _weighted_covariance(samples_2d, weights_1d, xp)
96
+
97
+ bandwidth_vec = _as_bandwidth_per_feature(self.bandwidth_per_feature, n_features, xp, ref_arr=data_cov)
98
+ if bandwidth_vec is not None and metric_name != "diagonal":
99
+ raise ValueError("bandwidth_per_feature requires kernel_metric='diagonal'")
100
+
101
+ bw_result = None
102
+ if bandwidth_vec is None:
103
+ if isinstance(self.bandwidth, str):
104
+ bw_result = select_bandwidth(
105
+ self.bandwidth,
106
+ n_eff=n_eff,
107
+ n_features=n_features,
108
+ samples_2d=samples_2d,
109
+ weights_1d=weights_1d,
110
+ data_cov=data_cov,
111
+ xp=xp,
112
+ enable_r_selectors=True,
113
+ estimator="kernel_regression",
114
+ targets=targets_2d,
115
+ regression=regression_name,
116
+ kernel=kernel_name,
117
+ )
118
+ factor = float(bw_result.factor)
119
+ else:
120
+ factor = float(self.bandwidth)
121
+ if (not np.isfinite(factor)) or factor <= 0.0:
122
+ raise ValueError("bandwidth factor must be a finite positive scalar")
123
+ base_cov = data_cov if metric_name == "full" else xp.diag(xp.diag(data_cov))
124
+ scaled_cov = base_cov * (factor**2)
125
+ else:
126
+ tiny = float(np.finfo(np.float64).tiny)
127
+ diag_cov = xp.diag(data_cov)
128
+ diag_sd = xp.sqrt(xp_maximum(diag_cov, tiny, xp))
129
+ rel = bandwidth_vec / diag_sd
130
+ factor = float(_to_float_scalar(xp.mean(rel)))
131
+ if (not np.isfinite(factor)) or factor <= 0.0:
132
+ factor = 1.0
133
+ scaled_cov = xp.diag(bandwidth_vec * bandwidth_vec)
134
+
135
+ inv_cov, _, stable_cov = _stable_inv_and_det(scaled_cov, xp)
136
+ target_mean = xp.sum(targets_2d * weights_1d[:, None], axis=0)
137
+
138
+ self.samples_ = samples_2d
139
+ self.targets_ = targets_2d
140
+ self.weights_ = weights_1d
141
+ self.bandwidth_factor_ = factor
142
+ self.bandwidth_info_ = bw_result
143
+ self.covariance_ = stable_cov
144
+ self.inv_covariance_ = inv_cov
145
+ self.kernel_ = kernel_name
146
+ self.kernel_metric_ = metric_name
147
+ self.bandwidth_per_feature_ = bandwidth_vec
148
+ self.n_features_ = n_features
149
+ self.n_targets_ = n_targets
150
+ self.n_samples_ = n_samples
151
+ self.backend_ = backend_name
152
+ self.regression_ = regression_name
153
+ self.target_mean_ = target_mean
154
+ self.target_was_1d_ = target_was_1d
155
+ # Cache reusable terms for prediction hot paths.
156
+ self._samples_proj_ = self.samples_ @ self.inv_covariance_
157
+ self._samples_quad_ = xp.sum(self._samples_proj_ * self.samples_, axis=1)
158
+
159
+ self._ll_use_vectorized_moments_ = bool(self.n_features_ <= 24)
160
+ self._ll_sample_xx_flat_ = None
161
+ self._ll_sample_xy_flat_ = None
162
+ self._ll_eye_p1_ = None
163
+ self._ll_ones_col_ = None
164
+ if self.regression_ == "local_linear":
165
+ if self._ll_use_vectorized_moments_:
166
+ self._ll_sample_xx_flat_ = (
167
+ (self.samples_[:, :, None] * self.samples_[:, None, :]).reshape(
168
+ self.n_samples_,
169
+ self.n_features_ * self.n_features_,
170
+ )
171
+ )
172
+ self._ll_sample_xy_flat_ = (
173
+ (self.samples_[:, :, None] * self.targets_[:, None, :]).reshape(
174
+ self.n_samples_,
175
+ self.n_features_ * self.n_targets_,
176
+ )
177
+ )
178
+ self._ll_eye_p1_ = xp_eye(self.n_features_ + 1, xp.float64, xp, ref_arr=self.samples_)
179
+ else:
180
+ self._ll_ones_col_ = xp_ones((self.n_samples_, 1), xp.float64, xp, ref_arr=self.samples_)
181
+ self._fitted = True
182
+ return self
183
+
184
+ def _require_fitted(self) -> None:
185
+ if not self._fitted:
186
+ raise RuntimeError("Model not fitted. Call fit() first.")
187
+
188
+ def _cleanup_cuda_memory(self):
189
+ if not self.gpu_memory_cleanup:
190
+ return
191
+ try:
192
+ import cupy as cp
193
+ cp.get_default_memory_pool().free_all_blocks()
194
+ cp.get_default_pinned_memory_pool().free_all_blocks()
195
+ except Exception:
196
+ pass
197
+
198
+ def _cleanup_torch_memory(self):
199
+ if not self.gpu_memory_cleanup:
200
+ return
201
+ try:
202
+ import torch
203
+ torch.cuda.empty_cache()
204
+ torch.cuda.synchronize()
205
+ except Exception:
206
+ pass
207
+
208
+ def __del__(self):
209
+ try:
210
+ self._cleanup_cuda_memory()
211
+ self._cleanup_torch_memory()
212
+ except Exception:
213
+ pass
214
+
215
+ def _evaluate_nadaraya_watson(
216
+ self,
217
+ points_2d,
218
+ *,
219
+ batch_size: int,
220
+ min_effective_weight: float,
221
+ xp,
222
+ ):
223
+ if int(batch_size) <= 0:
224
+ raise ValueError("batch_size must be a positive integer")
225
+
226
+ min_weight = float(min_effective_weight)
227
+ if (not np.isfinite(min_weight)) or min_weight <= 0.0:
228
+ raise ValueError("min_effective_weight must be a finite positive scalar")
229
+
230
+ min_weight = max(min_weight, float(np.finfo(np.float64).tiny))
231
+
232
+ samples_2d = self.samples_
233
+ targets_2d = self.targets_
234
+ weights_1d = self.weights_
235
+ inv_cov = self.inv_covariance_
236
+ fallback = self.target_mean_
237
+ kernel_name = self.kernel_
238
+
239
+ n_points = int(points_2d.shape[0])
240
+ n_features = int(samples_2d.shape[1])
241
+ n_targets = int(targets_2d.shape[1])
242
+
243
+ out = xp_empty((n_points, n_targets), xp.float64, xp, ref_arr=points_2d)
244
+
245
+ if n_features == 1:
246
+ samples_1d = samples_2d[:, 0]
247
+ inv_scalar = inv_cov[0, 0]
248
+
249
+ if kernel_name == "gaussian":
250
+ scale = -0.5 * inv_scalar
251
+ for start in range(0, n_points, int(batch_size)):
252
+ stop = min(start + int(batch_size), n_points)
253
+ q_1d = points_2d[start:stop, 0]
254
+
255
+ diff = q_1d[:, None] - samples_1d[None, :]
256
+ diff *= diff
257
+ diff *= scale
258
+ xp.exp(diff, out=diff)
259
+
260
+ weighted_kernels = diff * weights_1d[None, :]
261
+ denom = xp.sum(weighted_kernels, axis=1)
262
+ numer = weighted_kernels @ targets_2d
263
+
264
+ denom_safe = xp_maximum(denom, min_weight, xp)
265
+ pred = numer / denom_safe[:, None]
266
+ out[start:stop] = xp.where(denom[:, None] > min_weight, pred, fallback[None, :])
267
+
268
+ return out
269
+
270
+ for start in range(0, n_points, int(batch_size)):
271
+ stop = min(start + int(batch_size), n_points)
272
+ q_1d = points_2d[start:stop, 0]
273
+
274
+ diff = q_1d[:, None] - samples_1d[None, :]
275
+ quad = (diff * diff) * inv_scalar
276
+ kernels = _kernel_values_from_quad(quad, kernel_name, xp)
277
+
278
+ weighted_kernels = kernels * weights_1d[None, :]
279
+ denom = xp.sum(weighted_kernels, axis=1)
280
+ numer = weighted_kernels @ targets_2d
281
+
282
+ denom_safe = xp_maximum(denom, min_weight, xp)
283
+ pred = numer / denom_safe[:, None]
284
+ out[start:stop] = xp.where(denom[:, None] > min_weight, pred, fallback[None, :])
285
+
286
+ return out
287
+
288
+ s_proj = self._samples_proj_
289
+ s_quad = self._samples_quad_
290
+
291
+ for start in range(0, n_points, int(batch_size)):
292
+ stop = min(start + int(batch_size), n_points)
293
+ q = points_2d[start:stop]
294
+
295
+ q_proj = q @ inv_cov
296
+ q_quad = xp.sum(q_proj * q, axis=1)
297
+ cross = q_proj @ samples_2d.T
298
+ quad = q_quad[:, None] + s_quad[None, :] - 2.0 * cross
299
+ quad = xp_maximum(quad, 0.0, xp)
300
+
301
+ kernels = _kernel_values_from_quad(quad, kernel_name, xp)
302
+ weighted_kernels = kernels * weights_1d[None, :]
303
+
304
+ denom = xp.sum(weighted_kernels, axis=1)
305
+ numer = weighted_kernels @ targets_2d
306
+
307
+ denom_safe = xp_maximum(denom, min_weight, xp)
308
+ pred = numer / denom_safe[:, None]
309
+ out[start:stop] = xp.where(denom[:, None] > min_weight, pred, fallback[None, :])
310
+
311
+ return out
312
+
313
+ def _evaluate_local_linear(
314
+ self,
315
+ points_2d,
316
+ *,
317
+ batch_size: int,
318
+ min_effective_weight: float,
319
+ xp,
320
+ ):
321
+ if int(batch_size) <= 0:
322
+ raise ValueError("batch_size must be a positive integer")
323
+
324
+ min_weight = float(min_effective_weight)
325
+ if (not np.isfinite(min_weight)) or min_weight <= 0.0:
326
+ raise ValueError("min_effective_weight must be a finite positive scalar")
327
+
328
+ min_weight = max(min_weight, float(np.finfo(np.float64).tiny))
329
+
330
+ samples_2d = self.samples_
331
+ targets_2d = self.targets_
332
+ weights_1d = self.weights_
333
+ inv_cov = self.inv_covariance_
334
+ fallback = self.target_mean_
335
+ kernel_name = self.kernel_
336
+
337
+ n_points = int(points_2d.shape[0])
338
+ n_samples = int(samples_2d.shape[0])
339
+ n_features = int(samples_2d.shape[1])
340
+ n_targets = int(targets_2d.shape[1])
341
+
342
+ out = xp_empty((n_points, n_targets), xp.float64, xp, ref_arr=points_2d)
343
+
344
+ if n_features == 1:
345
+ samples_1d = samples_2d[:, 0]
346
+ inv_scalar = inv_cov[0, 0]
347
+
348
+ if kernel_name == "gaussian":
349
+ scale = -0.5 * inv_scalar
350
+ for start in range(0, n_points, int(batch_size)):
351
+ stop = min(start + int(batch_size), n_points)
352
+ q_1d = points_2d[start:stop, 0]
353
+
354
+ delta = q_1d[:, None] - samples_1d[None, :]
355
+ quad = delta * delta
356
+ quad *= scale
357
+ xp.exp(quad, out=quad)
358
+
359
+ weighted_kernels = quad * weights_1d[None, :]
360
+ s0 = xp.sum(weighted_kernels, axis=1)
361
+ s1 = xp.sum(weighted_kernels * delta, axis=1)
362
+ s2 = xp.sum(weighted_kernels * delta * delta, axis=1)
363
+
364
+ t0 = weighted_kernels @ targets_2d
365
+ t1 = (weighted_kernels * delta) @ targets_2d
366
+
367
+ det = s0 * s2 - s1 * s1
368
+ det_thresh = min_weight * min_weight
369
+ use_ll = (s0 > min_weight) & (xp.abs(det) > det_thresh)
370
+
371
+ det_safe = xp.where(use_ll, det, 1.0)
372
+ pred_ll = (s2[:, None] * t0 - s1[:, None] * t1) / det_safe[:, None]
373
+
374
+ denom_safe = xp_maximum(s0, min_weight, xp)
375
+ pred_nw = t0 / denom_safe[:, None]
376
+
377
+ pred = xp.where(use_ll[:, None], pred_ll, pred_nw)
378
+ out[start:stop] = xp.where(s0[:, None] > min_weight, pred, fallback[None, :])
379
+
380
+ return out
381
+
382
+ for start in range(0, n_points, int(batch_size)):
383
+ stop = min(start + int(batch_size), n_points)
384
+ q_1d = points_2d[start:stop, 0]
385
+
386
+ diff = q_1d[:, None] - samples_1d[None, :]
387
+ quad = (diff * diff) * inv_scalar
388
+ kernels = _kernel_values_from_quad(quad, kernel_name, xp)
389
+
390
+ weighted_kernels = kernels * weights_1d[None, :]
391
+ s0 = xp.sum(weighted_kernels, axis=1)
392
+ s1 = xp.sum(weighted_kernels * diff, axis=1)
393
+ s2 = xp.sum(weighted_kernels * diff * diff, axis=1)
394
+
395
+ t0 = weighted_kernels @ targets_2d
396
+ t1 = (weighted_kernels * diff) @ targets_2d
397
+
398
+ det = s0 * s2 - s1 * s1
399
+ det_thresh = min_weight * min_weight
400
+ use_ll = (s0 > min_weight) & (xp.abs(det) > det_thresh)
401
+
402
+ det_safe = xp.where(use_ll, det, 1.0)
403
+ pred_ll = (s2[:, None] * t0 - s1[:, None] * t1) / det_safe[:, None]
404
+
405
+ denom_safe = xp_maximum(s0, min_weight, xp)
406
+ pred_nw = t0 / denom_safe[:, None]
407
+
408
+ pred = xp.where(use_ll[:, None], pred_ll, pred_nw)
409
+ out[start:stop] = xp.where(s0[:, None] > min_weight, pred, fallback[None, :])
410
+
411
+ return out
412
+
413
+ s_proj = self._samples_proj_
414
+ s_quad = self._samples_quad_
415
+
416
+ use_vectorized_moments = bool(self._ll_use_vectorized_moments_)
417
+ sample_xx_flat = self._ll_sample_xx_flat_
418
+ sample_xy_flat = self._ll_sample_xy_flat_
419
+ eye_p1 = self._ll_eye_p1_
420
+ ones_col = self._ll_ones_col_
421
+
422
+ for start in range(0, n_points, int(batch_size)):
423
+ stop = min(start + int(batch_size), n_points)
424
+ q = points_2d[start:stop]
425
+
426
+ q_proj = q @ inv_cov
427
+ q_quad = xp.sum(q_proj * q, axis=1)
428
+ cross = q_proj @ samples_2d.T
429
+ quad = q_quad[:, None] + s_quad[None, :] - 2.0 * cross
430
+ quad = xp_maximum(quad, 0.0, xp)
431
+
432
+ kernels = _kernel_values_from_quad(quad, kernel_name, xp)
433
+ weighted_kernels = kernels * weights_1d[None, :]
434
+ denom = xp.sum(weighted_kernels, axis=1)
435
+ numer_nw = weighted_kernels @ targets_2d
436
+
437
+ denom_safe = xp_maximum(denom, min_weight, xp)
438
+ pred_nw = numer_nw / denom_safe[:, None]
439
+
440
+ if use_vectorized_moments:
441
+ b = int(stop - start)
442
+
443
+ wx = weighted_kernels @ samples_2d
444
+ s1 = wx - q * denom[:, None]
445
+
446
+ s2 = (weighted_kernels @ sample_xx_flat).reshape(b, n_features, n_features)
447
+ q_outer = q[:, :, None] * q[:, None, :]
448
+ s2 = (
449
+ s2
450
+ - wx[:, :, None] * q[:, None, :]
451
+ - q[:, :, None] * wx[:, None, :]
452
+ + denom[:, None, None] * q_outer
453
+ )
454
+ s2 = 0.5 * (s2 + xp.swapaxes(s2, 1, 2))
455
+
456
+ t1 = (weighted_kernels @ sample_xy_flat).reshape(b, n_features, n_targets)
457
+ t1 = t1 - q[:, :, None] * numer_nw[:, None, :]
458
+
459
+ p1 = n_features + 1
460
+ A_batch = xp_empty((b, p1, p1), xp.float64, xp, ref_arr=weighted_kernels)
461
+ A_batch[:, 0, 0] = denom
462
+ A_batch[:, 0, 1:] = s1
463
+ A_batch[:, 1:, 0] = s1
464
+ A_batch[:, 1:, 1:] = s2
465
+
466
+ B_batch = xp_empty((b, p1, n_targets), xp.float64, xp, ref_arr=weighted_kernels)
467
+ B_batch[:, 0, :] = numer_nw
468
+ B_batch[:, 1:, :] = t1
469
+
470
+ if _torch_dev(A_batch) is not None:
471
+ trace_batch = xp.sum(xp.diagonal(A_batch, dim1=1, dim2=2), axis=1)
472
+ else:
473
+ trace_batch = xp.sum(xp.diagonal(A_batch, axis1=1, axis2=2), axis=1)
474
+ ridge = xp_maximum(trace_batch / float(max(1, p1)) * 1e-10, 1e-10, xp)
475
+
476
+ solved = False
477
+ beta0 = None
478
+ A_work = A_batch
479
+ ridge_work = ridge
480
+ for _ in range(6):
481
+ try:
482
+ beta = xp.linalg.solve(A_work, B_batch)
483
+ beta0 = beta[:, 0, :]
484
+ solved = True
485
+ break
486
+ except Exception:
487
+ A_work = A_work + ridge_work[:, None, None] * eye_p1[None, :, :]
488
+ ridge_work = ridge_work * 10.0
489
+
490
+ if solved and beta0 is not None:
491
+ finite_mask = xp.all(xp.isfinite(beta0), axis=1)
492
+ use_ll = (denom > min_weight) & finite_mask
493
+ pred = xp.where(use_ll[:, None], beta0, pred_nw)
494
+ else:
495
+ pred = pred_nw
496
+
497
+ out[start:stop] = xp.where(denom[:, None] > min_weight, pred, fallback[None, :])
498
+ continue
499
+
500
+ for i in range(int(stop - start)):
501
+ denom_i = _to_float_scalar(denom[i])
502
+ if denom_i <= min_weight:
503
+ out[start + i] = fallback
504
+ continue
505
+
506
+ wi = weighted_kernels[i]
507
+ Xc = samples_2d - q[i]
508
+ Z = xp.concatenate((ones_col, Xc), axis=1)
509
+
510
+ zw = Z * wi[:, None]
511
+ A = Z.T @ zw
512
+ B = Z.T @ (wi[:, None] * targets_2d)
513
+
514
+ beta = _solve_linear_system_with_ridge(A, B, xp)
515
+ if beta is None:
516
+ out[start + i] = pred_nw[i]
517
+ else:
518
+ out[start + i] = beta[0]
519
+
520
+ return out
521
+
522
+ def predict(
523
+ self,
524
+ points,
525
+ *,
526
+ batch_size: Optional[int] = None,
527
+ min_effective_weight: Optional[float] = None,
528
+ ):
529
+ self._require_fitted()
530
+ if batch_size is None:
531
+ batch_size = int(self.batch_size)
532
+ if min_effective_weight is None:
533
+ min_effective_weight = float(self.min_effective_weight)
534
+
535
+ xp = _get_xp(self.backend_)
536
+ points_2d = _as_points_2d(points, self.n_features_, xp, ref_arr=self.samples_)
537
+ if self.regression_ == "local_linear":
538
+ preds_2d = self._evaluate_local_linear(
539
+ points_2d,
540
+ batch_size=int(batch_size),
541
+ min_effective_weight=float(min_effective_weight),
542
+ xp=xp,
543
+ )
544
+ else:
545
+ preds_2d = self._evaluate_nadaraya_watson(
546
+ points_2d,
547
+ batch_size=int(batch_size),
548
+ min_effective_weight=float(min_effective_weight),
549
+ xp=xp,
550
+ )
551
+
552
+ self._cleanup_cuda_memory()
553
+ self._cleanup_torch_memory()
554
+ if self.target_was_1d_:
555
+ return preds_2d.reshape(-1)
556
+ return preds_2d
557
+
558
+ def __call__(
559
+ self,
560
+ points,
561
+ *,
562
+ batch_size: Optional[int] = None,
563
+ min_effective_weight: Optional[float] = None,
564
+ ):
565
+ return self.predict(
566
+ points,
567
+ batch_size=batch_size,
568
+ min_effective_weight=min_effective_weight,
569
+ )
570
+
571
+ def score(self, X, y):
572
+ pred = _to_numpy(self.predict(X)).reshape(-1)
573
+ target = _to_numpy(y).reshape(-1)
574
+ if pred.shape[0] != target.shape[0]:
575
+ raise ValueError("X and y have incompatible lengths")
576
+
577
+ ss_res = float(np.sum((target - pred) ** 2))
578
+ y_mean = float(np.mean(target))
579
+ ss_tot = float(np.sum((target - y_mean) ** 2))
580
+ if ss_tot <= 0.0:
581
+ return 0.0
582
+ return 1.0 - (ss_res / ss_tot)
583
+
584
+ def to_numpy_metadata(self):
585
+ self._require_fitted()
586
+ bandwidth_selection = None
587
+ if hasattr(self.bandwidth_info_, "to_dict"):
588
+ bandwidth_selection = self.bandwidth_info_.to_dict()
589
+ return {
590
+ "bandwidth_factor": float(self.bandwidth_factor_),
591
+ "bandwidth_selection": bandwidth_selection,
592
+ "bandwidth_per_feature": (
593
+ None
594
+ if self.bandwidth_per_feature_ is None
595
+ else _to_numpy(self.bandwidth_per_feature_)
596
+ ),
597
+ "n_samples": int(self.n_samples_),
598
+ "n_features": int(self.n_features_),
599
+ "n_targets": int(self.n_targets_),
600
+ "backend": self.backend_,
601
+ "kernel": self.kernel_,
602
+ "kernel_metric": self.kernel_metric_,
603
+ "regression": self.regression_,
604
+ "covariance": _to_numpy(self.covariance_),
605
+ "inv_covariance": _to_numpy(self.inv_covariance_),
606
+ "weights": _to_numpy(self.weights_),
607
+ "target_mean": _to_numpy(self.target_mean_),
608
+ }
609
+
610
+
611
+ class KernelRegressionRegressor(KernelRegression):
612
+ """Alias class with sklearn-like naming for explicit regressor semantics."""
613
+
614
+
615
+ def fit_kernel_regression(
616
+ samples,
617
+ targets,
618
+ *,
619
+ bandwidth: Union[str, float, int] = "scott",
620
+ weights=None,
621
+ kernel: str = "gaussian",
622
+ regression: str = "nw",
623
+ kernel_metric: str = "full",
624
+ bandwidth_per_feature=None,
625
+ backend: str = "auto",
626
+ ) -> KernelRegression:
627
+ """Fit a kernel regressor (Nadaraya-Watson or local-linear)."""
628
+ model = KernelRegression(
629
+ bandwidth=bandwidth,
630
+ weights=weights,
631
+ kernel=kernel,
632
+ regression=regression,
633
+ kernel_metric=kernel_metric,
634
+ bandwidth_per_feature=bandwidth_per_feature,
635
+ backend=backend,
636
+ )
637
+ return model.fit(samples, targets)
638
+
639
+
640
+ def kernel_regression_predict(
641
+ samples,
642
+ targets,
643
+ points,
644
+ *,
645
+ bandwidth: Union[str, float, int] = "scott",
646
+ weights=None,
647
+ kernel: str = "gaussian",
648
+ regression: str = "nw",
649
+ kernel_metric: str = "full",
650
+ bandwidth_per_feature=None,
651
+ backend: str = "auto",
652
+ batch_size: int = 1024,
653
+ min_effective_weight: float = 1e-12,
654
+ ):
655
+ """One-shot kernel regression prediction."""
656
+ model = fit_kernel_regression(
657
+ samples,
658
+ targets,
659
+ bandwidth=bandwidth,
660
+ weights=weights,
661
+ kernel=kernel,
662
+ regression=regression,
663
+ kernel_metric=kernel_metric,
664
+ bandwidth_per_feature=bandwidth_per_feature,
665
+ backend=backend,
666
+ )
667
+ return model.predict(
668
+ points,
669
+ batch_size=batch_size,
670
+ min_effective_weight=min_effective_weight,
671
+ )
672
+
673
+
674
+ def _as_targets_2d(targets, n_samples: int, xp, ref_arr=None):
675
+ arr = xp_asarray(targets, dtype=xp.float64, xp=xp, ref_arr=ref_arr)
676
+ if arr.ndim == 1:
677
+ if int(arr.shape[0]) != int(n_samples):
678
+ raise ValueError("targets must have the same number of rows as samples")
679
+ return arr.reshape(-1, 1), True
680
+
681
+ if arr.ndim == 2:
682
+ if int(arr.shape[0]) != int(n_samples):
683
+ raise ValueError("targets must have the same number of rows as samples")
684
+ return arr, False
685
+
686
+ raise ValueError("targets must be 1D or 2D")
687
+
688
+
689
+ def _normalize_kernel_metric_name(kernel_metric: str) -> str:
690
+ name = str(kernel_metric).strip().lower()
691
+ aliases = {
692
+ "full": "full",
693
+ "full_covariance": "full",
694
+ "full-covariance": "full",
695
+ "covariance": "full",
696
+ "diag": "diagonal",
697
+ "diagonal": "diagonal",
698
+ "axis_aligned": "diagonal",
699
+ "axis-aligned": "diagonal",
700
+ }
701
+ normalized = aliases.get(name)
702
+ if normalized is None:
703
+ raise ValueError("kernel_metric must be one of: 'full', 'diagonal'")
704
+ return normalized
705
+
706
+
707
+ def _as_bandwidth_per_feature(bandwidth_per_feature, n_features: int, xp, ref_arr=None):
708
+ if bandwidth_per_feature is None:
709
+ return None
710
+
711
+ bw = xp_asarray(bandwidth_per_feature, dtype=xp.float64, xp=xp, ref_arr=ref_arr).reshape(-1)
712
+ if int(bw.size) == 1 and int(n_features) > 1:
713
+ bw = xp_full(int(n_features), _to_float_scalar(bw[0]), xp.float64, xp, ref_arr=ref_arr)
714
+
715
+ if int(bw.size) != int(n_features):
716
+ raise ValueError("bandwidth_per_feature must match sample feature dimension")
717
+ if _to_float_scalar(xp.sum(~xp.isfinite(bw))) > 0.0:
718
+ raise ValueError("bandwidth_per_feature must contain only finite values")
719
+ if _to_float_scalar(xp.min(bw)) <= 0.0:
720
+ raise ValueError("bandwidth_per_feature must be strictly positive")
721
+
722
+ return bw
723
+
724
+
725
+ def _solve_linear_system_with_ridge(A, B, xp):
726
+ p1 = int(A.shape[0])
727
+ eye = xp_eye(p1, xp.float64, xp, ref_arr=A)
728
+
729
+ trace = _to_float_scalar(xp.trace(A))
730
+ base = trace / float(max(1, p1)) if np.isfinite(trace) else 1.0
731
+ ridge = max(base * 1e-10, 1e-10)
732
+
733
+ A_work = A
734
+ for _ in range(6):
735
+ try:
736
+ return xp.linalg.solve(A_work, B)
737
+ except Exception:
738
+ A_work = A_work + ridge * eye
739
+ ridge *= 10.0
740
+ return None
741
+
742
+
743
+ __all__ = [
744
+ "KernelRegression",
745
+ "KernelRegressionRegressor",
746
+ "fit_kernel_regression",
747
+ "kernel_regression_predict",
748
+ ]