statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""Limited-memory BFGS solver for smooth penalised objectives.
|
|
2
|
+
|
|
3
|
+
Generic solver -- works with any loss that implements fused_value_and_gradient().
|
|
4
|
+
Keeps parameters, gradients, and curvature history on the input backend.
|
|
5
|
+
GPU-optimised path uses:
|
|
6
|
+
- loss.fused_value_and_gradient to avoid redundant X@coef
|
|
7
|
+
- _dot_dev / _norm2_dev to stay on device
|
|
8
|
+
- _sync_scalars to batch GPU-to-CPU transfers
|
|
9
|
+
- _device_leq for device-side line search
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
__all__ = ["lbfgs_solver"]
|
|
15
|
+
|
|
16
|
+
import warnings
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from statgpu.backends import _resolve_backend
|
|
20
|
+
from statgpu.backends._array_ops import (
|
|
21
|
+
_copy_arr,
|
|
22
|
+
_device_gt,
|
|
23
|
+
_device_leq,
|
|
24
|
+
_dot_dev,
|
|
25
|
+
_norm2_dev,
|
|
26
|
+
_sync_scalars,
|
|
27
|
+
_zeros,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from ._convergence import ConvergenceWarning
|
|
31
|
+
from ._utils import (
|
|
32
|
+
_smooth_penalty_gradient,
|
|
33
|
+
_smooth_penalty_value_dev,
|
|
34
|
+
_validate_uniform_sample_weight,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def lbfgs_solver(
|
|
39
|
+
loss: "GLMLoss",
|
|
40
|
+
penalty: "Penalty | None",
|
|
41
|
+
X,
|
|
42
|
+
y,
|
|
43
|
+
max_iter: int = 100,
|
|
44
|
+
tol: float = 1e-4,
|
|
45
|
+
init_coef=None,
|
|
46
|
+
history_size: int = 10,
|
|
47
|
+
sample_weight=None,
|
|
48
|
+
) -> tuple:
|
|
49
|
+
"""Limited-memory BFGS for smooth objectives.
|
|
50
|
+
|
|
51
|
+
Works with any loss that implements ``fused_value_and_gradient(X, y, coef)``
|
|
52
|
+
returning ``(value, gradient)``. Supports numpy / cupy / torch backends
|
|
53
|
+
via auto-detection of *X*.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
loss : object
|
|
58
|
+
Loss with ``fused_value_and_gradient(X, y, coef)`` and
|
|
59
|
+
``preprocess(X, y)`` methods.
|
|
60
|
+
penalty : object or None
|
|
61
|
+
Smooth penalty (l2, elasticnet, none).
|
|
62
|
+
X, y : array-like
|
|
63
|
+
Design matrix and response vector.
|
|
64
|
+
max_iter : int
|
|
65
|
+
Maximum number of L-BFGS iterations.
|
|
66
|
+
tol : float
|
|
67
|
+
Convergence tolerance on gradient norm and step norm.
|
|
68
|
+
init_coef : array-like or None
|
|
69
|
+
Initial coefficient vector. Zeros if *None*.
|
|
70
|
+
history_size : int
|
|
71
|
+
Number of past (s, y) pairs to store.
|
|
72
|
+
sample_weight : array-like or None
|
|
73
|
+
Sample weights. Must be uniform (all equal) for this solver.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
params : array
|
|
78
|
+
Optimised coefficient vector.
|
|
79
|
+
n_iter : int
|
|
80
|
+
Number of iterations performed.
|
|
81
|
+
"""
|
|
82
|
+
backend = _resolve_backend("auto", X)
|
|
83
|
+
X_proc, y_proc = loss.preprocess(X, y)
|
|
84
|
+
n_features = X_proc.shape[1]
|
|
85
|
+
_validate_uniform_sample_weight(sample_weight, X_proc.shape[0], "lbfgs_solver")
|
|
86
|
+
|
|
87
|
+
if init_coef is not None:
|
|
88
|
+
params = (
|
|
89
|
+
_copy_arr(init_coef)
|
|
90
|
+
if hasattr(init_coef, "copy") or hasattr(init_coef, "clone")
|
|
91
|
+
else np.array(init_coef).copy()
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
params = _zeros(n_features, backend, ref_tensor=X)
|
|
95
|
+
|
|
96
|
+
s_hist = []
|
|
97
|
+
y_hist = []
|
|
98
|
+
rho_hist = []
|
|
99
|
+
|
|
100
|
+
# Initial gradient (fused to avoid redundant X@coef)
|
|
101
|
+
_init_val_dev, grad = loss.fused_value_and_gradient(X_proc, y_proc, params)
|
|
102
|
+
grad = grad + _smooth_penalty_gradient(penalty, params)
|
|
103
|
+
|
|
104
|
+
if backend == "torch":
|
|
105
|
+
import torch
|
|
106
|
+
tol_dev = torch.tensor(tol, dtype=torch.float64, device=params.device)
|
|
107
|
+
else:
|
|
108
|
+
tol_dev = tol
|
|
109
|
+
iteration = -1 # default if max_iter=0
|
|
110
|
+
|
|
111
|
+
for iteration in range(max_iter):
|
|
112
|
+
grad_norm_dev = _norm2_dev(grad)
|
|
113
|
+
|
|
114
|
+
# Two-loop recursion -- all dot products stay on device
|
|
115
|
+
q = _copy_arr(grad)
|
|
116
|
+
alphas = []
|
|
117
|
+
for s_vec, y_vec, rho in reversed(list(zip(s_hist, y_hist, rho_hist))):
|
|
118
|
+
alpha = rho * _dot_dev(s_vec, q)
|
|
119
|
+
alphas.append(alpha)
|
|
120
|
+
q = q - alpha * y_vec
|
|
121
|
+
|
|
122
|
+
if y_hist:
|
|
123
|
+
sy = _dot_dev(s_hist[-1], y_hist[-1])
|
|
124
|
+
yy = _dot_dev(y_hist[-1], y_hist[-1])
|
|
125
|
+
gamma = sy / yy if _device_gt(yy, 1e-30) else 1.0
|
|
126
|
+
else:
|
|
127
|
+
gamma = 1.0
|
|
128
|
+
r = gamma * q
|
|
129
|
+
|
|
130
|
+
for s_vec, y_vec, rho, alpha in zip(
|
|
131
|
+
s_hist, y_hist, rho_hist, reversed(alphas)
|
|
132
|
+
):
|
|
133
|
+
beta = rho * _dot_dev(y_vec, r)
|
|
134
|
+
r = r + s_vec * (alpha - beta)
|
|
135
|
+
|
|
136
|
+
direction = -r
|
|
137
|
+
gdd_dev = _dot_dev(grad, direction)
|
|
138
|
+
|
|
139
|
+
# Batch sync: grad_norm + grad_dot_dir
|
|
140
|
+
gn, gdd = _sync_scalars(grad_norm_dev, gdd_dev, backend=backend)
|
|
141
|
+
if gn < tol:
|
|
142
|
+
break
|
|
143
|
+
if gdd >= 0:
|
|
144
|
+
direction = -grad
|
|
145
|
+
gdd = -gn # -||grad||^2
|
|
146
|
+
|
|
147
|
+
# Line search -- stays on device
|
|
148
|
+
old_val_dev, _ = loss.fused_value_and_gradient(X_proc, y_proc, params)
|
|
149
|
+
old_val_dev = old_val_dev + _smooth_penalty_value_dev(penalty, params)
|
|
150
|
+
|
|
151
|
+
step = 1.0
|
|
152
|
+
params_new = params
|
|
153
|
+
_ls_accepted = False
|
|
154
|
+
for _ in range(25):
|
|
155
|
+
candidate = params + step * direction
|
|
156
|
+
cand_val_dev, _ = loss.fused_value_and_gradient(X_proc, y_proc, candidate)
|
|
157
|
+
cand_val_dev = cand_val_dev + _smooth_penalty_value_dev(penalty, candidate)
|
|
158
|
+
# Device-side comparison -- single sync for the bool
|
|
159
|
+
if _device_leq(cand_val_dev, old_val_dev + 1e-4 * step * gdd):
|
|
160
|
+
params_new = candidate
|
|
161
|
+
_ls_accepted = True
|
|
162
|
+
break
|
|
163
|
+
step *= 0.5
|
|
164
|
+
if not _ls_accepted:
|
|
165
|
+
warnings.warn(
|
|
166
|
+
"lbfgs_solver: line search failed to find a descent step "
|
|
167
|
+
f"after 25 backtracking steps (iteration {iteration}). "
|
|
168
|
+
"Solver may stagnate.",
|
|
169
|
+
RuntimeWarning,
|
|
170
|
+
stacklevel=2,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Update gradient (fused)
|
|
174
|
+
_, grad_new = loss.fused_value_and_gradient(X_proc, y_proc, params_new)
|
|
175
|
+
grad_new = grad_new + _smooth_penalty_gradient(penalty, params_new)
|
|
176
|
+
|
|
177
|
+
s_vec = params_new - params
|
|
178
|
+
y_vec = grad_new - grad
|
|
179
|
+
ys_dev = _dot_dev(y_vec, s_vec)
|
|
180
|
+
s_norm_dev = _norm2_dev(s_vec)
|
|
181
|
+
|
|
182
|
+
# Batch sync: ys + s_norm
|
|
183
|
+
ys, s_norm = _sync_scalars(ys_dev, s_norm_dev, backend=backend)
|
|
184
|
+
if ys > 1e-12:
|
|
185
|
+
s_hist.append(s_vec)
|
|
186
|
+
y_hist.append(y_vec)
|
|
187
|
+
rho_hist.append(1.0 / ys)
|
|
188
|
+
if len(s_hist) > history_size:
|
|
189
|
+
s_hist.pop(0)
|
|
190
|
+
y_hist.pop(0)
|
|
191
|
+
rho_hist.pop(0)
|
|
192
|
+
|
|
193
|
+
params = params_new
|
|
194
|
+
grad = grad_new
|
|
195
|
+
if s_norm < tol:
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
n_iter = iteration + 1
|
|
199
|
+
if n_iter >= max_iter:
|
|
200
|
+
warnings.warn(
|
|
201
|
+
f"lbfgs_solver did not converge within {max_iter} iterations "
|
|
202
|
+
f"(loss={getattr(loss, 'name', '?')}, penalty={getattr(penalty, 'name', '?')}).",
|
|
203
|
+
ConvergenceWarning,
|
|
204
|
+
stacklevel=2,
|
|
205
|
+
)
|
|
206
|
+
return params, n_iter
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Newton-Raphson solver with Armijo backtracking line search.
|
|
2
|
+
|
|
3
|
+
Generic solver — works with any loss that implements hessian() and gradient().
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
__all__ = ["newton_solver"]
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from statgpu.backends import _resolve_backend
|
|
14
|
+
from statgpu.backends._array_ops import (
|
|
15
|
+
_copy_arr,
|
|
16
|
+
_dot_dev,
|
|
17
|
+
_norm2_dev,
|
|
18
|
+
_sync_scalars,
|
|
19
|
+
_zeros,
|
|
20
|
+
_device_leq,
|
|
21
|
+
)
|
|
22
|
+
from statgpu.backends._utils import _to_float_scalar
|
|
23
|
+
|
|
24
|
+
from ._convergence import ConvergenceWarning
|
|
25
|
+
from ._utils import (
|
|
26
|
+
_validate_uniform_sample_weight,
|
|
27
|
+
_smooth_penalty_gradient,
|
|
28
|
+
_smooth_penalty_hessian,
|
|
29
|
+
_smooth_penalty_value_dev,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def newton_solver(
|
|
34
|
+
loss: "GLMLoss",
|
|
35
|
+
penalty: "Penalty | None",
|
|
36
|
+
X,
|
|
37
|
+
y,
|
|
38
|
+
max_iter: int = 100,
|
|
39
|
+
tol: float = 1e-4,
|
|
40
|
+
init_coef=None,
|
|
41
|
+
sample_weight=None,
|
|
42
|
+
) -> tuple:
|
|
43
|
+
"""Newton-Raphson solver with Armijo backtracking line search.
|
|
44
|
+
|
|
45
|
+
Supports numpy / cupy / torch backends via auto-detection of X.
|
|
46
|
+
|
|
47
|
+
For losses with constant Hessian (e.g. Gamma log link), the Hessian
|
|
48
|
+
doesn't change across iterations, so the Newton step is always valid
|
|
49
|
+
and line search is skipped.
|
|
50
|
+
|
|
51
|
+
Requires: loss has hessian() and penalty is smooth.
|
|
52
|
+
"""
|
|
53
|
+
backend = _resolve_backend("auto", X)
|
|
54
|
+
X_proc, y_proc = loss.preprocess(X, y)
|
|
55
|
+
n_features = X_proc.shape[1]
|
|
56
|
+
|
|
57
|
+
if init_coef is not None:
|
|
58
|
+
params = (
|
|
59
|
+
_copy_arr(init_coef)
|
|
60
|
+
if hasattr(init_coef, "copy") or hasattr(init_coef, "clone")
|
|
61
|
+
else np.array(init_coef).copy()
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
params = _zeros(n_features, backend, ref_tensor=X_proc)
|
|
65
|
+
|
|
66
|
+
# Constant-Hessian detection via loss attribute (generic, not loss-name based)
|
|
67
|
+
_const_hessian = getattr(loss, "_has_constant_hessian", False)
|
|
68
|
+
|
|
69
|
+
_fixed_hess = None
|
|
70
|
+
if _const_hessian:
|
|
71
|
+
_fixed_hess = loss.hessian(X_proc, y_proc, params) + _smooth_penalty_hessian(
|
|
72
|
+
penalty, params
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
_validate_uniform_sample_weight(sample_weight, X_proc.shape[0], "newton_solver")
|
|
76
|
+
iteration = -1
|
|
77
|
+
|
|
78
|
+
for iteration in range(max_iter):
|
|
79
|
+
params_old = _copy_arr(params)
|
|
80
|
+
grad = loss.gradient(X_proc, y_proc, params) + _smooth_penalty_gradient(
|
|
81
|
+
penalty, params
|
|
82
|
+
)
|
|
83
|
+
hess = _fixed_hess if _fixed_hess is not None else (
|
|
84
|
+
loss.hessian(X_proc, y_proc, params) + _smooth_penalty_hessian(penalty, params)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
if backend == "numpy":
|
|
89
|
+
direction = np.linalg.solve(hess, grad)
|
|
90
|
+
elif backend == "cupy":
|
|
91
|
+
import cupy as cp
|
|
92
|
+
|
|
93
|
+
direction = cp.linalg.solve(hess, grad)
|
|
94
|
+
else:
|
|
95
|
+
import torch
|
|
96
|
+
|
|
97
|
+
direction = torch.linalg.solve(hess, grad.unsqueeze(1))
|
|
98
|
+
direction = direction.squeeze(1)
|
|
99
|
+
except (np.linalg.LinAlgError, ValueError, RuntimeError):
|
|
100
|
+
if backend == "numpy":
|
|
101
|
+
direction = np.linalg.lstsq(hess, grad, rcond=None)[0]
|
|
102
|
+
elif backend == "cupy":
|
|
103
|
+
import cupy as cp
|
|
104
|
+
|
|
105
|
+
direction = cp.linalg.lstsq(hess, grad)[0]
|
|
106
|
+
else:
|
|
107
|
+
import torch
|
|
108
|
+
|
|
109
|
+
direction = torch.linalg.lstsq(hess, grad.unsqueeze(1)).solution
|
|
110
|
+
direction = direction.squeeze(1)
|
|
111
|
+
|
|
112
|
+
# Armijo backtracking — use loss.fused_value_and_gradient (generic interface)
|
|
113
|
+
obj_old_dev, _ = loss.fused_value_and_gradient(X_proc, y_proc, params_old)
|
|
114
|
+
obj_old_dev = obj_old_dev + _smooth_penalty_value_dev(penalty, params_old)
|
|
115
|
+
gdd_dev = _dot_dev(grad, direction)
|
|
116
|
+
gdd = _to_float_scalar(gdd_dev)
|
|
117
|
+
|
|
118
|
+
step = 1.0
|
|
119
|
+
for _bt in range(20):
|
|
120
|
+
params_try = params_old - step * direction
|
|
121
|
+
try:
|
|
122
|
+
obj_try_dev, _ = loss.fused_value_and_gradient(X_proc, y_proc, params_try)
|
|
123
|
+
obj_try_dev = obj_try_dev + _smooth_penalty_value_dev(
|
|
124
|
+
penalty, params_try
|
|
125
|
+
)
|
|
126
|
+
if _device_leq(obj_try_dev, obj_old_dev + 1e-4 * step * gdd):
|
|
127
|
+
params = params_try
|
|
128
|
+
break
|
|
129
|
+
except (ValueError, RuntimeError, FloatingPointError):
|
|
130
|
+
pass
|
|
131
|
+
step *= 0.5
|
|
132
|
+
else:
|
|
133
|
+
params = params_old - step * direction
|
|
134
|
+
|
|
135
|
+
norm_diff_dev = _norm2_dev(params - params_old)
|
|
136
|
+
(nd,) = _sync_scalars(norm_diff_dev, backend=backend)
|
|
137
|
+
if nd < tol: # _norm2_dev returns L2 norm (not squared)
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
n_iter = iteration + 1
|
|
141
|
+
if n_iter >= max_iter:
|
|
142
|
+
warnings.warn(
|
|
143
|
+
f"newton_solver did not converge within {max_iter} iterations "
|
|
144
|
+
f"(loss={getattr(loss, 'name', '?')}, "
|
|
145
|
+
f"penalty={getattr(penalty, 'name', '?')}).",
|
|
146
|
+
ConvergenceWarning,
|
|
147
|
+
stacklevel=2,
|
|
148
|
+
)
|
|
149
|
+
return params, n_iter
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""Shared utility functions for solvers.
|
|
2
|
+
|
|
3
|
+
Validation helpers, penalty value/gradient/hessian utilities,
|
|
4
|
+
and objective function helpers. All work with generic loss/penalty interfaces.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from statgpu.backends import _resolve_backend, _to_numpy
|
|
10
|
+
from statgpu.backends._utils import _to_float_scalar, _get_xp
|
|
11
|
+
from statgpu.backends._array_ops import (
|
|
12
|
+
_abs_sum,
|
|
13
|
+
_abs_sum_dev,
|
|
14
|
+
_copy_arr,
|
|
15
|
+
_dot,
|
|
16
|
+
_dot_dev,
|
|
17
|
+
_eye_like,
|
|
18
|
+
_norm2,
|
|
19
|
+
_norm2_dev,
|
|
20
|
+
_sum_sq,
|
|
21
|
+
_sum_sq_dev,
|
|
22
|
+
_sync_scalars,
|
|
23
|
+
_zeros,
|
|
24
|
+
_zeros_like,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _validate_uniform_sample_weight(sample_weight, n_samples, solver_name):
|
|
29
|
+
if sample_weight is None:
|
|
30
|
+
return
|
|
31
|
+
_sw = _to_numpy(sample_weight)
|
|
32
|
+
if _sw.ndim != 1 or _sw.shape[0] != n_samples:
|
|
33
|
+
raise ValueError("sample_weight must be a 1D array with length n_samples")
|
|
34
|
+
if not np.all(np.isfinite(_sw)):
|
|
35
|
+
raise ValueError("sample_weight must contain only finite values")
|
|
36
|
+
if np.any(_sw < 0):
|
|
37
|
+
raise ValueError("sample_weight must be non-negative")
|
|
38
|
+
if np.sum(_sw) <= 0.0:
|
|
39
|
+
raise ValueError("sample_weight must contain at least one positive value")
|
|
40
|
+
if not np.allclose(_sw, _sw[0]):
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"{solver_name} does not support non-uniform sample_weight yet; "
|
|
43
|
+
"use solver='irls' for weighted GLM fits."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate_sample_weight(sample_weight, n_samples):
|
|
48
|
+
if sample_weight is None:
|
|
49
|
+
return
|
|
50
|
+
_sw = _to_numpy(sample_weight)
|
|
51
|
+
if _sw.ndim != 1 or _sw.shape[0] != n_samples:
|
|
52
|
+
raise ValueError("sample_weight must be 1D with length n_samples")
|
|
53
|
+
if not np.all(np.isfinite(_sw)):
|
|
54
|
+
raise ValueError("sample_weight must contain only finite values")
|
|
55
|
+
if np.any(_sw < 0):
|
|
56
|
+
raise ValueError("sample_weight must be non-negative")
|
|
57
|
+
if np.sum(_sw) <= 0:
|
|
58
|
+
raise ValueError("sample_weight must contain at least one positive value")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _as_backend_vector(arr, backend, ref):
|
|
62
|
+
from statgpu.backends._utils import xp_asarray
|
|
63
|
+
xp = _get_xp(backend)
|
|
64
|
+
dtype = getattr(ref, "dtype", np.float64)
|
|
65
|
+
return xp_asarray(arr, dtype=dtype, xp=xp, ref_arr=ref)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _call_with_weight(fn, *args, sample_weight=None, **kwargs):
|
|
69
|
+
"""Call fn with sample_weight if it accepts it, without otherwise.
|
|
70
|
+
|
|
71
|
+
Avoids the repeated try/except TypeError pattern. Inspects the
|
|
72
|
+
function signature once to decide whether to pass sample_weight.
|
|
73
|
+
"""
|
|
74
|
+
import inspect
|
|
75
|
+
try:
|
|
76
|
+
sig = inspect.signature(fn)
|
|
77
|
+
if 'sample_weight' in sig.parameters:
|
|
78
|
+
return fn(*args, sample_weight=sample_weight, **kwargs)
|
|
79
|
+
except (ValueError, TypeError):
|
|
80
|
+
pass
|
|
81
|
+
return fn(*args, **kwargs)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _nesterov_momentum(t_k, beta_cap=None):
|
|
85
|
+
"""Compute Nesterov momentum parameters.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
t_k : float
|
|
90
|
+
Current momentum parameter.
|
|
91
|
+
beta_cap : float, optional
|
|
92
|
+
Maximum allowed momentum (e.g. 0.5 for CV stability).
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
beta : float
|
|
97
|
+
Momentum coefficient.
|
|
98
|
+
t_new : float
|
|
99
|
+
Updated momentum parameter.
|
|
100
|
+
"""
|
|
101
|
+
import math
|
|
102
|
+
t_new = (1.0 + math.sqrt(1.0 + 4.0 * t_k * t_k)) / 2.0
|
|
103
|
+
beta = (t_k - 1.0) / t_new
|
|
104
|
+
if beta_cap is not None:
|
|
105
|
+
beta = min(beta, beta_cap)
|
|
106
|
+
return beta, t_new
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _nesterov_update(coef, coef_old, t_k, beta_cap=None):
|
|
110
|
+
"""Nesterov momentum update: compute extrapolated point y_k and new t.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
coef : array
|
|
115
|
+
Current iterate.
|
|
116
|
+
coef_old : array
|
|
117
|
+
Previous iterate.
|
|
118
|
+
t_k : float
|
|
119
|
+
Current momentum parameter.
|
|
120
|
+
beta_cap : float, optional
|
|
121
|
+
Maximum allowed momentum (e.g. 0.5 for CV stability).
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
y_k : array
|
|
126
|
+
Extrapolated point: coef + beta * (coef - coef_old).
|
|
127
|
+
t_new : float
|
|
128
|
+
Updated momentum parameter.
|
|
129
|
+
"""
|
|
130
|
+
beta, t_new = _nesterov_momentum(t_k, beta_cap)
|
|
131
|
+
y_k = coef + beta * (coef - coef_old)
|
|
132
|
+
return y_k, t_new
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _penalty_name(penalty):
|
|
136
|
+
return str(getattr(penalty, "name", "none")).lower()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _smooth_penalty_value(penalty, coef):
|
|
140
|
+
if penalty is None:
|
|
141
|
+
return 0.0
|
|
142
|
+
if hasattr(penalty, "smooth_value"):
|
|
143
|
+
return float(_to_numpy(penalty.smooth_value(coef)))
|
|
144
|
+
if _penalty_name(penalty) in ("none", "null"):
|
|
145
|
+
return 0.0
|
|
146
|
+
if _penalty_name(penalty) == "l2":
|
|
147
|
+
return 0.5 * float(getattr(penalty, "alpha", 0.0)) * _sum_sq(coef)
|
|
148
|
+
if _penalty_name(penalty) == "elasticnet":
|
|
149
|
+
alpha = float(getattr(penalty, "alpha", 0.0))
|
|
150
|
+
l1_ratio = float(getattr(penalty, "l1_ratio", 1.0))
|
|
151
|
+
return 0.5 * alpha * (1.0 - l1_ratio) * _sum_sq(coef)
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"solver requires a smooth penalty, got penalty='{_penalty_name(penalty)}'."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _tracking_penalty_value(penalty, coef):
|
|
158
|
+
pen_name = _penalty_name(penalty)
|
|
159
|
+
if penalty is None or pen_name in ("none", "null"):
|
|
160
|
+
return 0.0
|
|
161
|
+
n_features = getattr(penalty, "n_features", None)
|
|
162
|
+
if n_features is not None:
|
|
163
|
+
coef_eval = coef[: int(n_features)]
|
|
164
|
+
backend = _resolve_backend("auto", coef_eval)
|
|
165
|
+
if pen_name == "l1":
|
|
166
|
+
if backend in ("torch", "cupy"):
|
|
167
|
+
abs_sum, = _sync_scalars(_abs_sum_dev(coef_eval), backend=backend)
|
|
168
|
+
else:
|
|
169
|
+
abs_sum = _abs_sum(coef_eval)
|
|
170
|
+
return float(getattr(penalty, "alpha", 0.0)) * abs_sum
|
|
171
|
+
if pen_name in ("elasticnet", "en"):
|
|
172
|
+
alpha = float(getattr(penalty, "alpha", 0.0))
|
|
173
|
+
l1_ratio = float(getattr(penalty, "l1_ratio", 1.0))
|
|
174
|
+
if backend in ("torch", "cupy"):
|
|
175
|
+
abs_sum, sum_sq = _sync_scalars(
|
|
176
|
+
_abs_sum_dev(coef_eval), _sum_sq_dev(coef_eval), backend=backend,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
abs_sum = _abs_sum(coef_eval)
|
|
180
|
+
sum_sq = _sum_sq(coef_eval)
|
|
181
|
+
return alpha * (l1_ratio * abs_sum + 0.5 * (1.0 - l1_ratio) * sum_sq)
|
|
182
|
+
try:
|
|
183
|
+
return float(penalty.value(coef))
|
|
184
|
+
except (ValueError, TypeError, AttributeError):
|
|
185
|
+
pass
|
|
186
|
+
try:
|
|
187
|
+
return float(penalty.value(_to_numpy(coef)))
|
|
188
|
+
except (ValueError, TypeError, AttributeError):
|
|
189
|
+
pass
|
|
190
|
+
return 0.0
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _abs_mean_max(y, backend):
|
|
194
|
+
backend = _resolve_backend(backend, y)
|
|
195
|
+
xp = _get_xp(backend)
|
|
196
|
+
y_abs = xp.abs(y)
|
|
197
|
+
mean_val, max_val = _sync_scalars(xp.mean(y_abs), xp.max(y_abs), backend=backend)
|
|
198
|
+
return mean_val, max_val
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _smooth_penalty_gradient(penalty, coef):
|
|
202
|
+
if penalty is None or _penalty_name(penalty) in ("none", "null"):
|
|
203
|
+
return _zeros_like(coef)
|
|
204
|
+
if hasattr(penalty, "smooth_gradient"):
|
|
205
|
+
return penalty.smooth_gradient(coef)
|
|
206
|
+
if _penalty_name(penalty) == "l2":
|
|
207
|
+
return float(getattr(penalty, "alpha", 0.0)) * coef
|
|
208
|
+
if _penalty_name(penalty) == "elasticnet":
|
|
209
|
+
alpha = float(getattr(penalty, "alpha", 0.0))
|
|
210
|
+
l1_ratio = float(getattr(penalty, "l1_ratio", 1.0))
|
|
211
|
+
return alpha * (1.0 - l1_ratio) * coef
|
|
212
|
+
raise ValueError(
|
|
213
|
+
f"solver requires a smooth penalty, got penalty='{_penalty_name(penalty)}'."
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _smooth_penalty_hessian(penalty, coef):
|
|
218
|
+
if penalty is None or _penalty_name(penalty) in ("none", "null"):
|
|
219
|
+
return 0.0
|
|
220
|
+
n = coef.shape[0]
|
|
221
|
+
if hasattr(penalty, "smooth_hessian"):
|
|
222
|
+
return penalty.smooth_hessian(coef)
|
|
223
|
+
if _penalty_name(penalty) == "l2":
|
|
224
|
+
return float(getattr(penalty, "alpha", 0.0)) * _eye_like(n, coef)
|
|
225
|
+
if _penalty_name(penalty) in ("elasticnet", "en"):
|
|
226
|
+
# ElasticNet Hessian is the L2 component only (L1 is non-smooth)
|
|
227
|
+
alpha = float(getattr(penalty, "alpha", 0.0))
|
|
228
|
+
l1_ratio = float(getattr(penalty, "l1_ratio", 0.5))
|
|
229
|
+
return alpha * (1.0 - l1_ratio) * _eye_like(n, coef)
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"solver requires a smooth penalty, got penalty='{_penalty_name(penalty)}'."
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _objective_value(loss, penalty, X, y, coef):
|
|
236
|
+
return float(_to_numpy(loss.value(X, y, coef))) + _smooth_penalty_value(penalty, coef)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _objective_gradient(loss, penalty, X, y, coef):
|
|
240
|
+
return loss.gradient(X, y, coef) + _smooth_penalty_gradient(penalty, coef)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _smooth_penalty_lipschitz(penalty):
|
|
244
|
+
if penalty is None:
|
|
245
|
+
return 0.0
|
|
246
|
+
_pname = _penalty_name(penalty)
|
|
247
|
+
if _pname in ("none", "null", "l1", "scad", "mcp", "adaptive_l1", "adaptive_lasso",
|
|
248
|
+
"group_lasso", "group_mcp", "group_scad", "gl", "gmcp", "gscad"):
|
|
249
|
+
return 0.0
|
|
250
|
+
alpha = float(getattr(penalty, 'alpha', 0.0))
|
|
251
|
+
l1_ratio = float(getattr(penalty, 'l1_ratio', 0.0))
|
|
252
|
+
return alpha * (1.0 - l1_ratio)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _smooth_penalty_value_dev(penalty, coef):
|
|
256
|
+
if penalty is None:
|
|
257
|
+
return 0.0
|
|
258
|
+
pname = _penalty_name(penalty)
|
|
259
|
+
if pname in ("none", "null"):
|
|
260
|
+
return 0.0
|
|
261
|
+
if pname == "l2":
|
|
262
|
+
return 0.5 * float(getattr(penalty, "alpha", 0.0)) * _sum_sq_dev(coef)
|
|
263
|
+
if pname == "elasticnet":
|
|
264
|
+
alpha = float(getattr(penalty, "alpha", 0.0))
|
|
265
|
+
l1_ratio = float(getattr(penalty, "l1_ratio", 1.0))
|
|
266
|
+
return 0.5 * alpha * (1.0 - l1_ratio) * _sum_sq_dev(coef)
|
|
267
|
+
raise ValueError(
|
|
268
|
+
f"smooth_penalty_value_dev requires a smooth penalty, got '{pname}'."
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _objective_value_dev(loss, penalty, X, y, coef):
|
|
273
|
+
val = loss.value(X, y, coef)
|
|
274
|
+
pen_val = _smooth_penalty_value_dev(penalty, coef)
|
|
275
|
+
if isinstance(pen_val, (int, float)) and pen_val == 0.0:
|
|
276
|
+
return val
|
|
277
|
+
return val + pen_val
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Survival analysis models.
|
|
3
|
+
|
|
4
|
+
.. rubric:: Naming conventions
|
|
5
|
+
|
|
6
|
+
- ``_cuda`` — CUDA RawKernel (pre-compiled CUDA C kernels).
|
|
7
|
+
- ``_cupy`` — CuPy array operations (GPU via CuPy).
|
|
8
|
+
- ``_triton`` — Triton kernel (GPU via OpenAI Triton).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from ._cox import CoxPH
|
|
12
|
+
from ._cox_cv import CoxPHCV
|
|
13
|
+
|
|
14
|
+
__all__ = ['CoxPH', 'CoxPHCV']
|