statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cross-validated Kernel Ridge Regression with GPU acceleration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Union
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from statgpu._base import BaseEstimator
|
|
12
|
+
from statgpu._config import Device
|
|
13
|
+
from statgpu.backends import _to_numpy, _torch_dev, xp_zeros, xp_astype
|
|
14
|
+
|
|
15
|
+
from statgpu.nonparametric.kernel_methods._kernels import pairwise_kernels
|
|
16
|
+
from statgpu.nonparametric.kernel_methods._krr import KernelRidge
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _kfold_indices(n_samples: int, n_splits: int, random_state: Optional[int] = None):
|
|
20
|
+
"""Generate K-fold train/test index arrays."""
|
|
21
|
+
rng = np.random.RandomState(random_state)
|
|
22
|
+
indices = np.arange(n_samples)
|
|
23
|
+
rng.shuffle(indices)
|
|
24
|
+
fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=np.int64)
|
|
25
|
+
fold_sizes[: n_samples % n_splits] += 1
|
|
26
|
+
current = 0
|
|
27
|
+
folds = []
|
|
28
|
+
for fold_size in fold_sizes:
|
|
29
|
+
start, stop = current, current + fold_size
|
|
30
|
+
test_idx = indices[start:stop]
|
|
31
|
+
train_idx = np.concatenate([indices[:start], indices[stop:]])
|
|
32
|
+
folds.append((train_idx, test_idx))
|
|
33
|
+
current = stop
|
|
34
|
+
return folds
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class KernelRidgeCV(BaseEstimator):
|
|
38
|
+
r"""Cross-validated Kernel Ridge Regression.
|
|
39
|
+
|
|
40
|
+
Efficiently searches over a grid of regularization parameters using
|
|
41
|
+
eigendecomposition of the kernel matrix. For a fixed kernel matrix
|
|
42
|
+
:math:`K = Q \Lambda Q^\top`, the solution for any :math:`\alpha` is:
|
|
43
|
+
|
|
44
|
+
.. math::
|
|
45
|
+
\boldsymbol{\alpha}(\lambda)
|
|
46
|
+
= Q \, \text{diag}\!\left(\frac{1}{\lambda_i + \lambda}\right)
|
|
47
|
+
Q^\top \mathbf{y}
|
|
48
|
+
|
|
49
|
+
This avoids re-solving the linear system for every alpha value.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
alphas : array-like, optional
|
|
54
|
+
Regularization strengths to try. If ``None``, a log-spaced grid
|
|
55
|
+
of 100 values is generated automatically.
|
|
56
|
+
cv : int, default=5
|
|
57
|
+
Number of cross-validation folds.
|
|
58
|
+
kernel : str or callable, default='rbf'
|
|
59
|
+
Kernel metric name or callable.
|
|
60
|
+
gamma : float, optional
|
|
61
|
+
Kernel coefficient. Defaults to ``1 / n_features``.
|
|
62
|
+
degree : int, default=3
|
|
63
|
+
Degree for polynomial kernel.
|
|
64
|
+
coef0 : float, default=1
|
|
65
|
+
Independent term for polynomial and sigmoid kernels.
|
|
66
|
+
kernel_params : dict, optional
|
|
67
|
+
Additional parameters for the kernel function.
|
|
68
|
+
random_state : int, optional
|
|
69
|
+
Random state for fold generation.
|
|
70
|
+
device : str or Device, default='auto'
|
|
71
|
+
Computation device.
|
|
72
|
+
n_jobs : int, optional
|
|
73
|
+
Not used; kept for API compatibility.
|
|
74
|
+
|
|
75
|
+
Attributes
|
|
76
|
+
----------
|
|
77
|
+
alpha_ : float
|
|
78
|
+
Best regularization parameter found by cross-validation.
|
|
79
|
+
best_score_ : float
|
|
80
|
+
Best mean R^2 score across folds.
|
|
81
|
+
cv_results_ : dict
|
|
82
|
+
Detailed cross-validation results.
|
|
83
|
+
estimator_ : KernelRidge
|
|
84
|
+
Fitted KernelRidge model with the best alpha.
|
|
85
|
+
dual_coef_ : ndarray
|
|
86
|
+
Dual coefficients of the fitted model (shortcut).
|
|
87
|
+
X_fit_ : ndarray
|
|
88
|
+
Training data (shortcut).
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
alphas: Optional[np.ndarray] = None,
|
|
94
|
+
cv: int = 5,
|
|
95
|
+
kernel: Union[str, callable] = "rbf",
|
|
96
|
+
gamma: Optional[float] = None,
|
|
97
|
+
degree: int = 3,
|
|
98
|
+
coef0: float = 1,
|
|
99
|
+
kernel_params: Optional[dict] = None,
|
|
100
|
+
random_state: Optional[int] = None,
|
|
101
|
+
device: Union[str, Device] = Device.AUTO,
|
|
102
|
+
n_jobs: Optional[int] = None,
|
|
103
|
+
):
|
|
104
|
+
super().__init__(device=device, n_jobs=n_jobs)
|
|
105
|
+
self.alphas = alphas
|
|
106
|
+
self.cv = cv
|
|
107
|
+
self.kernel = kernel
|
|
108
|
+
self.gamma = gamma
|
|
109
|
+
self.degree = degree
|
|
110
|
+
self.coef0 = coef0
|
|
111
|
+
self.kernel_params = kernel_params
|
|
112
|
+
self.random_state = random_state
|
|
113
|
+
|
|
114
|
+
# Fitted attributes
|
|
115
|
+
self.alpha_ = None
|
|
116
|
+
self.best_score_ = None
|
|
117
|
+
self.cv_results_ = None
|
|
118
|
+
self.estimator_ = None
|
|
119
|
+
self.dual_coef_ = None
|
|
120
|
+
self.X_fit_ = None
|
|
121
|
+
|
|
122
|
+
def _get_kernel_params(self):
|
|
123
|
+
"""Collect kernel-specific parameters."""
|
|
124
|
+
params = {}
|
|
125
|
+
if self.kernel_params is not None:
|
|
126
|
+
params.update(self.kernel_params)
|
|
127
|
+
k = str(self.kernel).strip().lower() if isinstance(self.kernel, str) else ""
|
|
128
|
+
if k in ("rbf", "gaussian", "polynomial", "poly", "laplacian", "sigmoid"):
|
|
129
|
+
if self.gamma is not None:
|
|
130
|
+
params["gamma"] = self.gamma
|
|
131
|
+
if k in ("polynomial", "poly", "sigmoid"):
|
|
132
|
+
if self.degree != 3 and k in ("polynomial", "poly"):
|
|
133
|
+
params["degree"] = self.degree
|
|
134
|
+
if self.coef0 != 1:
|
|
135
|
+
params["coef0"] = self.coef0
|
|
136
|
+
return params
|
|
137
|
+
|
|
138
|
+
def _generate_alpha_grid(self, eigvals):
|
|
139
|
+
"""Generate log-spaced alpha grid based on eigenvalue range.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
eigvals : ndarray
|
|
144
|
+
Eigenvalues of the kernel matrix.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
alphas : ndarray
|
|
149
|
+
Log-spaced alpha values.
|
|
150
|
+
"""
|
|
151
|
+
eig_np = _to_numpy(eigvals).ravel()
|
|
152
|
+
lambda_max = float(np.max(eig_np))
|
|
153
|
+
lambda_min = float(np.min(eig_np[eig_np > 1e-12])) if np.any(eig_np > 1e-12) else 1e-6
|
|
154
|
+
|
|
155
|
+
alpha_max = max(lambda_max * 10.0, 1.0)
|
|
156
|
+
alpha_min = max(lambda_min * 1e-3, 1e-8)
|
|
157
|
+
|
|
158
|
+
if alpha_min >= alpha_max:
|
|
159
|
+
alpha_min = alpha_max * 1e-4
|
|
160
|
+
|
|
161
|
+
return np.logspace(
|
|
162
|
+
np.log10(alpha_min),
|
|
163
|
+
np.log10(alpha_max),
|
|
164
|
+
num=100,
|
|
165
|
+
dtype=np.float64,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def fit(self, X, y):
|
|
169
|
+
"""Fit KernelRidgeCV model.
|
|
170
|
+
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
X : array-like of shape (n_samples, n_features)
|
|
174
|
+
Training data.
|
|
175
|
+
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
176
|
+
Target values.
|
|
177
|
+
|
|
178
|
+
Returns
|
|
179
|
+
-------
|
|
180
|
+
self
|
|
181
|
+
"""
|
|
182
|
+
# Resolve backend
|
|
183
|
+
backend = self._get_backend()
|
|
184
|
+
xp = backend.xp
|
|
185
|
+
|
|
186
|
+
X_arr = xp_astype(self._to_array(X), xp.float64, xp)
|
|
187
|
+
y_arr = xp_astype(self._to_array(y), xp.float64, xp)
|
|
188
|
+
if y_arr.ndim == 1:
|
|
189
|
+
y_arr = y_arr.reshape(-1, 1)
|
|
190
|
+
|
|
191
|
+
n_samples = X_arr.shape[0]
|
|
192
|
+
n_targets = y_arr.shape[1]
|
|
193
|
+
|
|
194
|
+
# Compute full kernel matrix once
|
|
195
|
+
kernel_params = self._get_kernel_params()
|
|
196
|
+
K = pairwise_kernels(X_arr, X_arr, metric=self.kernel, xp=xp, **kernel_params)
|
|
197
|
+
|
|
198
|
+
# Eigendecompose: K = Q @ diag(eigvals) @ Q.T
|
|
199
|
+
eigvals, Q = xp.linalg.eigh(K)
|
|
200
|
+
|
|
201
|
+
# Generate alpha grid if not provided
|
|
202
|
+
alphas_np = self.alphas
|
|
203
|
+
if alphas_np is None:
|
|
204
|
+
alphas_np = self._generate_alpha_grid(eigvals)
|
|
205
|
+
else:
|
|
206
|
+
alphas_np = np.asarray(alphas_np, dtype=np.float64).ravel()
|
|
207
|
+
n_alphas = alphas_np.shape[0]
|
|
208
|
+
|
|
209
|
+
# Project y into eigenbasis once: Q_T @ y
|
|
210
|
+
Q_T = Q.T # eigh returns real eigenvectors for symmetric K
|
|
211
|
+
Qt_y = Q_T @ y_arr # (n_samples, n_targets)
|
|
212
|
+
|
|
213
|
+
# K-fold CV
|
|
214
|
+
n_folds = int(self.cv)
|
|
215
|
+
folds = _kfold_indices(n_samples, n_folds, random_state=self.random_state)
|
|
216
|
+
|
|
217
|
+
# mse_table: (n_alphas, n_folds, n_targets)
|
|
218
|
+
mse_table = xp_zeros((n_alphas, n_folds, n_targets), xp.float64, xp, X_arr)
|
|
219
|
+
|
|
220
|
+
# Detect torch backend for GPU-accelerated CV
|
|
221
|
+
_is_torch = hasattr(K, 'device') and hasattr(K, 'is_cuda') and not hasattr(K, 'get')
|
|
222
|
+
|
|
223
|
+
for fi, (train_idx, test_idx) in enumerate(folds):
|
|
224
|
+
n_train = len(train_idx)
|
|
225
|
+
n_test = len(test_idx)
|
|
226
|
+
|
|
227
|
+
K_train = K[train_idx][:, train_idx]
|
|
228
|
+
y_train = y_arr[train_idx]
|
|
229
|
+
y_test = y_arr[test_idx]
|
|
230
|
+
K_test = K[test_idx][:, train_idx]
|
|
231
|
+
|
|
232
|
+
if _is_torch and K.is_cuda:
|
|
233
|
+
import torch
|
|
234
|
+
# Full GPU path: eigendecomposition + batched alpha sweep
|
|
235
|
+
fold_eigvals, fold_Q = torch.linalg.eigh(K_train)
|
|
236
|
+
Qt_y_fold = fold_Q.T @ y_train # (n_train, n_targets)
|
|
237
|
+
|
|
238
|
+
# Vectorized alpha sweep: (n_alphas, n_train)
|
|
239
|
+
alphas_t = torch.asarray(alphas_np, dtype=torch.float64, device=K.device)
|
|
240
|
+
inv_diag = 1.0 / (fold_eigvals[None, :] + alphas_t[:, None]) # (n_alphas, n_train)
|
|
241
|
+
|
|
242
|
+
# dual_coefs: (n_alphas, n_train, n_targets)
|
|
243
|
+
weighted = inv_diag[:, :, None] * Qt_y_fold[None, :, :] # (n_alphas, n_train, n_targets)
|
|
244
|
+
dual_coefs = fold_Q @ weighted # (n_alphas, n_train, n_targets)
|
|
245
|
+
|
|
246
|
+
# Predict: K_test @ dual_coefs -> (n_alphas, n_test, n_targets)
|
|
247
|
+
y_pred = torch.matmul(K_test.unsqueeze(0), dual_coefs) # (n_alphas, n_test, n_targets)
|
|
248
|
+
residuals = y_pred - y_test[None, :, :]
|
|
249
|
+
mse_vals = torch.mean(residuals ** 2, dim=1) # (n_alphas, n_targets)
|
|
250
|
+
|
|
251
|
+
mse_table[:, fi, :] = mse_vals
|
|
252
|
+
else:
|
|
253
|
+
# NumPy and CuPy path: vectorized alpha sweep on device
|
|
254
|
+
fold_eigvals, fold_Q = xp.linalg.eigh(K_train)
|
|
255
|
+
Qt_y_fold = fold_Q.T @ y_train # (n_train, n_targets)
|
|
256
|
+
|
|
257
|
+
# Vectorized alpha sweep: (n_alphas, n_train)
|
|
258
|
+
alphas_dev = xp.asarray(alphas_np, dtype=xp.float64)
|
|
259
|
+
inv_diag = 1.0 / (fold_eigvals[None, :] + alphas_dev[:, None]) # (n_alphas, n_train)
|
|
260
|
+
|
|
261
|
+
# dual_coefs: (n_alphas, n_train, n_targets)
|
|
262
|
+
weighted = inv_diag[:, :, None] * Qt_y_fold[None, :, :] # (a, n, t)
|
|
263
|
+
|
|
264
|
+
# Q @ weighted for each alpha: (a, m, t) = sum_n Q[m,n] * weighted[a,n,t]
|
|
265
|
+
dual_coefs = xp.einsum('mn,ant->amt', fold_Q, weighted) # (a, m, t)
|
|
266
|
+
|
|
267
|
+
# Predict: K_test @ dual_coefs for each alpha
|
|
268
|
+
# K_test[t,m] @ dual_coefs[a,m,tgt] -> y_pred[a,t,tgt]
|
|
269
|
+
if n_targets == 1:
|
|
270
|
+
# Faster path for single target
|
|
271
|
+
y_pred = (K_test @ dual_coefs[:, :, 0].T).T # (a, n_test)
|
|
272
|
+
residuals = y_pred - y_test.ravel()[None, :]
|
|
273
|
+
mse_vals = xp.mean(residuals ** 2, axis=1, keepdims=True) # (a, 1)
|
|
274
|
+
else:
|
|
275
|
+
y_pred = xp.einsum('tm,amk->atk', K_test, dual_coefs) # (a, n_test, n_targets)
|
|
276
|
+
residuals = y_pred - y_test[None, :, :]
|
|
277
|
+
mse_vals = xp.mean(residuals ** 2, axis=1) # (a, n_targets)
|
|
278
|
+
|
|
279
|
+
mse_table[:, fi, :] = mse_vals
|
|
280
|
+
|
|
281
|
+
# Mean MSE across folds: (n_alphas, n_targets)
|
|
282
|
+
mean_mse = xp.mean(mse_table, axis=1)
|
|
283
|
+
|
|
284
|
+
# For single target, select best alpha by mean MSE
|
|
285
|
+
if n_targets == 1:
|
|
286
|
+
mean_mse_1d = mean_mse[:, 0]
|
|
287
|
+
best_idx = int(xp.argmin(mean_mse_1d).item())
|
|
288
|
+
else:
|
|
289
|
+
# Average across targets for selection
|
|
290
|
+
mean_mse_avg = xp.mean(mean_mse, axis=1)
|
|
291
|
+
best_idx = int(xp.argmin(mean_mse_avg).item())
|
|
292
|
+
|
|
293
|
+
self.alpha_ = float(alphas_np[best_idx])
|
|
294
|
+
|
|
295
|
+
# Compute mean R^2 across folds for best alpha
|
|
296
|
+
mean_mse_best = float(mean_mse[best_idx, 0].item()) if n_targets == 1 else float(xp.mean(mean_mse[best_idx]).item())
|
|
297
|
+
y_var = float(xp.var(y_arr).item())
|
|
298
|
+
self.best_score_ = 1.0 - mean_mse_best / y_var if y_var > 0 else 0.0
|
|
299
|
+
|
|
300
|
+
# Build cv_results_
|
|
301
|
+
self.cv_results_ = {
|
|
302
|
+
"alphas": alphas_np,
|
|
303
|
+
"mean_mse": _to_numpy(mean_mse),
|
|
304
|
+
"mse_table": _to_numpy(mse_table),
|
|
305
|
+
"best_alpha": self.alpha_,
|
|
306
|
+
"best_score": self.best_score_,
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
# Refit with best alpha
|
|
310
|
+
self.estimator_ = KernelRidge(
|
|
311
|
+
alpha=self.alpha_,
|
|
312
|
+
kernel=self.kernel,
|
|
313
|
+
gamma=self.gamma,
|
|
314
|
+
degree=self.degree,
|
|
315
|
+
coef0=self.coef0,
|
|
316
|
+
kernel_params=self.kernel_params,
|
|
317
|
+
device=self.device,
|
|
318
|
+
)
|
|
319
|
+
self.estimator_.fit(X, y)
|
|
320
|
+
|
|
321
|
+
# Shortcut attributes
|
|
322
|
+
self.dual_coef_ = self.estimator_.dual_coef_
|
|
323
|
+
self.X_fit_ = self.estimator_.X_fit_
|
|
324
|
+
self._xp = self.estimator_._xp
|
|
325
|
+
|
|
326
|
+
self._fitted = True
|
|
327
|
+
return self
|
|
328
|
+
|
|
329
|
+
def predict(self, X):
|
|
330
|
+
"""Predict using the best Kernel Ridge model.
|
|
331
|
+
|
|
332
|
+
Parameters
|
|
333
|
+
----------
|
|
334
|
+
X : array-like of shape (n_samples_test, n_features)
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
y_pred : ndarray
|
|
339
|
+
"""
|
|
340
|
+
self._check_is_fitted()
|
|
341
|
+
return self.estimator_.predict(X)
|
|
342
|
+
|
|
343
|
+
def score(self, X, y):
|
|
344
|
+
"""Return R^2 score using the best Kernel Ridge model.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
X : array-like of shape (n_samples, n_features)
|
|
349
|
+
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
350
|
+
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
score : float
|
|
354
|
+
"""
|
|
355
|
+
self._check_is_fitted()
|
|
356
|
+
return self.estimator_.score(X, y)
|
|
357
|
+
|
|
358
|
+
def get_params(self, deep=True):
|
|
359
|
+
"""Get parameters for this estimator."""
|
|
360
|
+
params = super().get_params(deep=deep)
|
|
361
|
+
params.update({
|
|
362
|
+
"alphas": self.alphas,
|
|
363
|
+
"cv": self.cv,
|
|
364
|
+
"kernel": self.kernel,
|
|
365
|
+
"gamma": self.gamma,
|
|
366
|
+
"degree": self.degree,
|
|
367
|
+
"coef0": self.coef0,
|
|
368
|
+
"kernel_params": self.kernel_params,
|
|
369
|
+
"random_state": self.random_state,
|
|
370
|
+
})
|
|
371
|
+
return params
|
|
372
|
+
|
|
373
|
+
def set_params(self, **params):
|
|
374
|
+
"""Set parameters for this estimator."""
|
|
375
|
+
super().set_params(**params)
|
|
376
|
+
for key in ("alphas", "cv", "kernel", "gamma", "degree", "coef0",
|
|
377
|
+
"kernel_params", "random_state"):
|
|
378
|
+
if key in params:
|
|
379
|
+
setattr(self, key, params[key])
|
|
380
|
+
return self
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Kernel smoothing methods: KDE and Nadaraya-Watson kernel regression."""
|
|
2
|
+
|
|
3
|
+
from ._bandwidth_selection import (
|
|
4
|
+
BandwidthSelectionResult,
|
|
5
|
+
select_bandwidth,
|
|
6
|
+
select_bandwidth_factor,
|
|
7
|
+
)
|
|
8
|
+
from ._kde import (
|
|
9
|
+
KernelDensityEstimator,
|
|
10
|
+
KDE,
|
|
11
|
+
KDEBootstrapResult,
|
|
12
|
+
fit_kde,
|
|
13
|
+
kde_pdf,
|
|
14
|
+
kde_confidence_interval,
|
|
15
|
+
kde_bootstrap_confidence_interval,
|
|
16
|
+
)
|
|
17
|
+
from ._kernel_regression import (
|
|
18
|
+
KernelRegression,
|
|
19
|
+
KernelRegressionRegressor,
|
|
20
|
+
fit_kernel_regression,
|
|
21
|
+
kernel_regression_predict,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"BandwidthSelectionResult",
|
|
26
|
+
"select_bandwidth",
|
|
27
|
+
"select_bandwidth_factor",
|
|
28
|
+
"KernelDensityEstimator",
|
|
29
|
+
"KDE",
|
|
30
|
+
"KDEBootstrapResult",
|
|
31
|
+
"fit_kde",
|
|
32
|
+
"kde_pdf",
|
|
33
|
+
"kde_confidence_interval",
|
|
34
|
+
"kde_bootstrap_confidence_interval",
|
|
35
|
+
"KernelRegression",
|
|
36
|
+
"KernelRegressionRegressor",
|
|
37
|
+
"fit_kernel_regression",
|
|
38
|
+
"kernel_regression_predict",
|
|
39
|
+
]
|