statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Link and Family abstractions for GLM.
|
|
3
|
+
|
|
4
|
+
Extracted from the duplicated IRLS loops in _logistic.py across CPU/GPU/Torch backends.
|
|
5
|
+
Each Family defines: link function, variance function, and IRLS weights/working response.
|
|
6
|
+
|
|
7
|
+
All operations are backend-aware: numpy/cupy/torch via _xp dispatch.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from statgpu.backends._array_ops import _clip, _log, _xp
|
|
16
|
+
from statgpu.inference._distributions_backend import get_distribution
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Link", "IdentityLink", "LogLink", "InversePowerLink", "InverseSquaredLink",
|
|
20
|
+
"GLMFamily", "Gaussian", "Binomial", "Poisson", "Gamma",
|
|
21
|
+
"InverseGaussian", "NegativeBinomial", "Tweedie",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _backend_name(arr):
|
|
26
|
+
"""Infer backend name from array type."""
|
|
27
|
+
mod = type(arr).__module__
|
|
28
|
+
if mod.startswith("cupy"):
|
|
29
|
+
return "cupy"
|
|
30
|
+
if mod.startswith("torch"):
|
|
31
|
+
return "torch"
|
|
32
|
+
return "numpy"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# _exp with overflow protection (clips input to [-500, 500])
|
|
36
|
+
_ETA_CLIP_EXP = 500.0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _exp(arr):
|
|
40
|
+
"""Exponential with overflow protection."""
|
|
41
|
+
return _xp(arr).exp(_clip(arr, -_ETA_CLIP_EXP, _ETA_CLIP_EXP))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _sqrt(arr):
|
|
45
|
+
"""Square root with clamp to prevent NaN from negative values."""
|
|
46
|
+
return _xp(arr).sqrt(_clip(arr, 0, None))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _ones_like(arr):
|
|
50
|
+
return _xp(arr).ones_like(arr)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _cdf(arr):
|
|
54
|
+
"""Standard normal CDF (Phi)."""
|
|
55
|
+
backend = _backend_name(arr)
|
|
56
|
+
return get_distribution("norm", backend=backend).cdf(arr)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _ppf(arr):
|
|
60
|
+
"""Standard normal PPF (inverse CDF, Phi^{-1})."""
|
|
61
|
+
backend = _backend_name(arr)
|
|
62
|
+
return get_distribution("norm", backend=backend).ppf(arr)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _pdf(arr):
|
|
66
|
+
"""Standard normal PDF (phi)."""
|
|
67
|
+
backend = _backend_name(arr)
|
|
68
|
+
return get_distribution("norm", backend=backend).pdf(arr)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ─── Link Functions ────────────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class Link(ABC):
|
|
75
|
+
"""Link function abstract base class.
|
|
76
|
+
|
|
77
|
+
Maps between mean (mu) and linear predictor (eta):
|
|
78
|
+
eta = link(mu)
|
|
79
|
+
mu = inverse(eta)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
name: str
|
|
83
|
+
|
|
84
|
+
@abstractmethod
|
|
85
|
+
def link(self, mu):
|
|
86
|
+
"""eta = g(mu)."""
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def inverse(self, eta):
|
|
91
|
+
"""mu = g^{-1}(eta)."""
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def derivative(self, mu):
|
|
96
|
+
"""g'(mu) = d eta / d mu."""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class LogitLink(Link):
|
|
101
|
+
name = "logit"
|
|
102
|
+
|
|
103
|
+
def link(self, mu):
|
|
104
|
+
return _log(mu / (1 - mu))
|
|
105
|
+
|
|
106
|
+
def inverse(self, eta):
|
|
107
|
+
return 1.0 / (1.0 + _exp(-_clip(eta, -500, 500)))
|
|
108
|
+
|
|
109
|
+
def derivative(self, mu):
|
|
110
|
+
return 1.0 / (mu * (1 - mu))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ProbitLink(Link):
|
|
114
|
+
"""Probit link: inverse of standard normal CDF (Phi)."""
|
|
115
|
+
|
|
116
|
+
name = "probit"
|
|
117
|
+
|
|
118
|
+
def link(self, mu):
|
|
119
|
+
return _ppf(_clip(mu, 1e-10, 1 - 1e-10))
|
|
120
|
+
|
|
121
|
+
def inverse(self, eta):
|
|
122
|
+
return _cdf(eta)
|
|
123
|
+
|
|
124
|
+
def derivative(self, mu):
|
|
125
|
+
return 1.0 / _pdf(
|
|
126
|
+
_ppf(_clip(mu, 1e-10, 1 - 1e-10))
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class LogLink(Link):
|
|
131
|
+
name = "log"
|
|
132
|
+
|
|
133
|
+
def link(self, mu):
|
|
134
|
+
return _log(_clip(mu, 1e-10, None))
|
|
135
|
+
|
|
136
|
+
def inverse(self, eta):
|
|
137
|
+
return _exp(eta)
|
|
138
|
+
|
|
139
|
+
def derivative(self, mu):
|
|
140
|
+
return 1.0 / mu
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class IdentityLink(Link):
|
|
144
|
+
name = "identity"
|
|
145
|
+
|
|
146
|
+
def link(self, mu):
|
|
147
|
+
return mu
|
|
148
|
+
|
|
149
|
+
def inverse(self, eta):
|
|
150
|
+
return eta
|
|
151
|
+
|
|
152
|
+
def derivative(self, mu):
|
|
153
|
+
return _ones_like(mu)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class InversePowerLink(Link):
|
|
157
|
+
"""Inverse power link: eta = 1/mu (canonical for Gamma)."""
|
|
158
|
+
|
|
159
|
+
name = "inverse_power"
|
|
160
|
+
_ETA_LO = 1e-4
|
|
161
|
+
_ETA_HI = 1e3
|
|
162
|
+
|
|
163
|
+
def link(self, mu):
|
|
164
|
+
return 1.0 / _clip(mu, 1e-10, None)
|
|
165
|
+
|
|
166
|
+
def inverse(self, eta):
|
|
167
|
+
return 1.0 / _clip(eta, self._ETA_LO, self._ETA_HI)
|
|
168
|
+
|
|
169
|
+
def derivative(self, mu):
|
|
170
|
+
return -1.0 / (mu * mu)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class InverseSquaredLink(Link):
|
|
174
|
+
"""Inverse squared link: eta = 1/mu^2 (canonical for InverseGaussian)."""
|
|
175
|
+
|
|
176
|
+
name = "inverse_squared"
|
|
177
|
+
|
|
178
|
+
def link(self, mu):
|
|
179
|
+
return 1.0 / _clip(mu * mu, 1e-10, None)
|
|
180
|
+
|
|
181
|
+
def inverse(self, eta):
|
|
182
|
+
eta_c = _clip(eta, 1e-20, None)
|
|
183
|
+
return 1.0 / _clip(_sqrt(eta_c), 1e-10, None)
|
|
184
|
+
|
|
185
|
+
def derivative(self, mu):
|
|
186
|
+
return -2.0 / (mu * mu * mu)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# ─── Families ──────────────────────────────────────────────────────────────
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class GLMFamily(ABC):
|
|
193
|
+
"""GLM distribution family.
|
|
194
|
+
|
|
195
|
+
Each family defines:
|
|
196
|
+
- link function: eta <-> mu mapping
|
|
197
|
+
- variance function: Var(Y) = phi * V(mu)
|
|
198
|
+
- IRLS weights and working response computation
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
name: str
|
|
202
|
+
link: Link
|
|
203
|
+
|
|
204
|
+
@abstractmethod
|
|
205
|
+
def variance(self, mu):
|
|
206
|
+
"""Variance function V(mu)."""
|
|
207
|
+
pass
|
|
208
|
+
|
|
209
|
+
def irls_weights(self, mu, y):
|
|
210
|
+
"""IRLS working weights.
|
|
211
|
+
|
|
212
|
+
W = 1 / (V(mu) * (g'(mu))^2)
|
|
213
|
+
|
|
214
|
+
Default uses the inverse Fisher weights for the WLS step in IRLS.
|
|
215
|
+
Subclasses can override for more efficient implementations.
|
|
216
|
+
"""
|
|
217
|
+
denom = self.variance(mu) * self.link.derivative(mu) ** 2
|
|
218
|
+
return 1.0 / _clip(denom, 1e-10, None)
|
|
219
|
+
|
|
220
|
+
def irls_working_response(self, mu, y, eta):
|
|
221
|
+
"""Working response z = eta + (y - mu) * link'(mu)."""
|
|
222
|
+
return eta + (y - mu) * self.link.derivative(mu)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class Gaussian(GLMFamily):
|
|
226
|
+
"""Gaussian family with identity link (standard linear regression)."""
|
|
227
|
+
|
|
228
|
+
name = "gaussian"
|
|
229
|
+
link = IdentityLink()
|
|
230
|
+
|
|
231
|
+
def variance(self, mu):
|
|
232
|
+
return _ones_like(mu)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class Binomial(GLMFamily):
|
|
236
|
+
"""Binomial family with configurable link (logistic/probit regression)."""
|
|
237
|
+
|
|
238
|
+
name = "binomial"
|
|
239
|
+
|
|
240
|
+
def __init__(self, link=None):
|
|
241
|
+
self.link = link if link is not None else LogitLink()
|
|
242
|
+
|
|
243
|
+
def variance(self, mu):
|
|
244
|
+
return mu * (1 - mu)
|
|
245
|
+
|
|
246
|
+
def irls_weights(self, mu, y):
|
|
247
|
+
mu_c = _clip(mu, 1e-10, 1 - 1e-10)
|
|
248
|
+
return mu_c * (1 - mu_c)
|
|
249
|
+
|
|
250
|
+
def irls_working_response(self, mu, y, eta):
|
|
251
|
+
mu_c = _clip(mu, 1e-10, 1 - 1e-10)
|
|
252
|
+
var = mu_c * (1 - mu_c)
|
|
253
|
+
return eta + (y - mu_c) / var
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class Poisson(GLMFamily):
|
|
257
|
+
"""Poisson family with log link (Poisson regression)."""
|
|
258
|
+
|
|
259
|
+
name = "poisson"
|
|
260
|
+
link = LogLink()
|
|
261
|
+
|
|
262
|
+
def variance(self, mu):
|
|
263
|
+
return mu
|
|
264
|
+
|
|
265
|
+
def irls_weights(self, mu, y):
|
|
266
|
+
return _clip(mu, 1e-10, None)
|
|
267
|
+
|
|
268
|
+
def irls_working_response(self, mu, y, eta):
|
|
269
|
+
return eta + (y - mu) / _clip(mu, 1e-10, None)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class Gamma(GLMFamily):
|
|
273
|
+
"""Gamma family (positive continuous outcomes).
|
|
274
|
+
|
|
275
|
+
Default link is log for numerical stability. Canonical link is inverse_power.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
name = "gamma"
|
|
279
|
+
|
|
280
|
+
def __init__(self, link=None):
|
|
281
|
+
self.link = link if link is not None else LogLink()
|
|
282
|
+
|
|
283
|
+
def variance(self, mu):
|
|
284
|
+
return mu * mu
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class InverseGaussian(GLMFamily):
|
|
288
|
+
"""Inverse Gaussian family (positive continuous, right-skewed).
|
|
289
|
+
|
|
290
|
+
Default link is log for numerical stability.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
name = "inverse_gaussian"
|
|
294
|
+
link = LogLink()
|
|
295
|
+
|
|
296
|
+
def variance(self, mu):
|
|
297
|
+
return mu * mu * mu
|
|
298
|
+
|
|
299
|
+
def irls_weights(self, mu, y):
|
|
300
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
301
|
+
return _ones_like(mu) / mu_c
|
|
302
|
+
|
|
303
|
+
def irls_working_response(self, mu, y, eta):
|
|
304
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
305
|
+
# z = eta + (y - mu) * g'(mu) = eta + (y - mu) / mu (log link)
|
|
306
|
+
return eta + (y - mu_c) / mu_c
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class NegativeBinomial(GLMFamily):
|
|
310
|
+
"""Negative Binomial family (overdispersed count data).
|
|
311
|
+
|
|
312
|
+
Uses log link. Dispersion parameter ``alpha`` controls overdispersion:
|
|
313
|
+
Var(Y) = mu + alpha * mu^2. When alpha -> 0, approaches Poisson.
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
name = "negative_binomial"
|
|
317
|
+
link = LogLink()
|
|
318
|
+
|
|
319
|
+
def __init__(self, alpha=1.0):
|
|
320
|
+
if not np.isfinite(alpha) or alpha <= 0.0:
|
|
321
|
+
raise ValueError("alpha must be a finite positive scalar for negative binomial family")
|
|
322
|
+
self.alpha = alpha
|
|
323
|
+
|
|
324
|
+
def variance(self, mu):
|
|
325
|
+
return mu + self.alpha * mu * mu
|
|
326
|
+
|
|
327
|
+
def irls_weights(self, mu, y):
|
|
328
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
329
|
+
return mu_c / (1.0 + self.alpha * mu_c)
|
|
330
|
+
|
|
331
|
+
def irls_working_response(self, mu, y, eta):
|
|
332
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
333
|
+
return eta + (y - mu_c) / mu_c
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class Tweedie(GLMFamily):
|
|
337
|
+
"""Tweedie family (power variance function).
|
|
338
|
+
|
|
339
|
+
Variance function: V(mu) = mu^power.
|
|
340
|
+
- power=0: Gaussian
|
|
341
|
+
- power=1: Poisson
|
|
342
|
+
- power=2: Gamma
|
|
343
|
+
- 1 < power < 2: compound Poisson-Gamma (most common usage)
|
|
344
|
+
"""
|
|
345
|
+
|
|
346
|
+
name = "tweedie"
|
|
347
|
+
link = LogLink()
|
|
348
|
+
|
|
349
|
+
def __init__(self, power=1.5):
|
|
350
|
+
self.power = power
|
|
351
|
+
|
|
352
|
+
def variance(self, mu):
|
|
353
|
+
return _clip(mu, 1e-10, None) ** self.power
|
|
354
|
+
|
|
355
|
+
def irls_weights(self, mu, y):
|
|
356
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
357
|
+
# w = 1 / (V(mu) * g'(mu)^2) = 1 / (mu^p * (1/mu)^2) = mu^(2-p)
|
|
358
|
+
return mu_c ** (2.0 - self.power)
|
|
359
|
+
|
|
360
|
+
def irls_working_response(self, mu, y, eta):
|
|
361
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
362
|
+
return eta + (y - mu_c) / mu_c
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""GLM-specific fused loss+gradient functions.
|
|
2
|
+
|
|
3
|
+
These avoid redundant X @ coef computation by computing value and gradient
|
|
4
|
+
in a single pass. They are called by GLMLoss subclasses' fused_value_and_gradient()
|
|
5
|
+
methods for performance.
|
|
6
|
+
|
|
7
|
+
NOT part of the generic solver interface — these are GLM internal optimizations.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from statgpu.backends._utils import _to_float_scalar, _get_xp
|
|
11
|
+
from statgpu.backends import _to_numpy
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _fused_logistic(eta, X, y, n, loss):
|
|
15
|
+
from statgpu.backends._array_ops import _sigmoid, _softplus, _sum
|
|
16
|
+
p = _sigmoid(eta)
|
|
17
|
+
log1pexp = _softplus(eta)
|
|
18
|
+
val = _sum(-y * eta + log1pexp) / n
|
|
19
|
+
grad = X.T @ (p - y) / n
|
|
20
|
+
return val, grad
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _fused_poisson(eta, X, y, n, loss):
|
|
24
|
+
from statgpu.backends._array_ops import _exp, _log, _clip, _sum
|
|
25
|
+
mu = _exp(_clip(eta, -30, 30))
|
|
26
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
27
|
+
val = _sum(mu - y * _log(mu_c)) / n
|
|
28
|
+
grad = X.T @ (mu - y) / n
|
|
29
|
+
return val, grad
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _fused_gamma(eta, X, y, n, loss):
|
|
33
|
+
from statgpu.backends._array_ops import _exp, _log, _clip, _sum
|
|
34
|
+
gamma_link = getattr(loss, "link_name", getattr(loss, "link", "log"))
|
|
35
|
+
if gamma_link == "inverse_power":
|
|
36
|
+
eta_lo = float(getattr(loss, "_ETA_LO", 1e-2))
|
|
37
|
+
eta_hi = float(getattr(loss, "_ETA_HI", 1e3))
|
|
38
|
+
eta_c = _clip(eta, eta_lo, eta_hi)
|
|
39
|
+
mu = 1.0 / eta_c
|
|
40
|
+
val = _sum(y * eta_c - _log(eta_c)) / n
|
|
41
|
+
grad = X.T @ (y - mu) / n
|
|
42
|
+
return val, grad
|
|
43
|
+
mu = _exp(_clip(eta, -30, 30))
|
|
44
|
+
mu_c = _clip(mu, 1e-10, None)
|
|
45
|
+
val = _sum(y / mu_c + _log(mu_c)) / n
|
|
46
|
+
grad = X.T @ ((mu_c - y) / mu_c) / n
|
|
47
|
+
return val, grad
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _fused_negative_binomial(eta, X, y, n, loss):
|
|
51
|
+
from statgpu.backends._array_ops import _exp, _log, _clip, _sum
|
|
52
|
+
a = float(getattr(loss, "alpha", 1.0))
|
|
53
|
+
mu = _exp(_clip(eta, -30, 30))
|
|
54
|
+
mu_c = _clip(mu, 1e-300, None)
|
|
55
|
+
one_plus_a_mu = 1.0 + a * mu_c
|
|
56
|
+
val = (
|
|
57
|
+
_sum(-y * _log(mu_c / one_plus_a_mu) + (1.0 / a) * _log(one_plus_a_mu)) / n
|
|
58
|
+
)
|
|
59
|
+
grad = X.T @ ((mu_c - y) / one_plus_a_mu) / n
|
|
60
|
+
return val, grad
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _fused_tweedie(eta, X, y, n, loss):
|
|
64
|
+
from statgpu.backends._array_ops import _exp, _clip, _sum, _log
|
|
65
|
+
pw = float(getattr(loss, "power", 1.5))
|
|
66
|
+
mu = _exp(_clip(eta, -50, 50))
|
|
67
|
+
mu_c = _clip(mu, 1e-10, 1e6)
|
|
68
|
+
log_mu = _log(mu_c)
|
|
69
|
+
d1 = 1.0 - pw
|
|
70
|
+
d2 = 2.0 - pw
|
|
71
|
+
if abs(d1) < 0.01:
|
|
72
|
+
term1 = -y * log_mu
|
|
73
|
+
else:
|
|
74
|
+
term1 = -y * mu_c**d1 / d1
|
|
75
|
+
if abs(d2) < 0.01:
|
|
76
|
+
term2 = log_mu
|
|
77
|
+
else:
|
|
78
|
+
term2 = mu_c**d2 / d2
|
|
79
|
+
val = _sum(term1 + term2) / n
|
|
80
|
+
grad = X.T @ (mu_c**d1 * (mu_c - y)) / n
|
|
81
|
+
return val, grad
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _fused_inverse_gaussian(eta, X, y, n, loss):
|
|
85
|
+
from statgpu.backends._array_ops import _exp, _clip, _sum
|
|
86
|
+
mu = _exp(_clip(eta, -30, 30))
|
|
87
|
+
mu_c = _clip(mu, 5e-2, 1e3)
|
|
88
|
+
val = _sum(y / (2.0 * mu_c * mu_c) - 1.0 / mu_c) / n
|
|
89
|
+
grad = X.T @ ((mu_c - y) / (mu_c * mu_c)) / n
|
|
90
|
+
return val, grad
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# Module-level dispatch table (avoids rebuilding dict on every call)
|
|
94
|
+
_FUSED_DISPATCH = {
|
|
95
|
+
"logistic": _fused_logistic,
|
|
96
|
+
"poisson": _fused_poisson,
|
|
97
|
+
"gamma": _fused_gamma,
|
|
98
|
+
"negative_binomial": _fused_negative_binomial,
|
|
99
|
+
"tweedie": _fused_tweedie,
|
|
100
|
+
"inverse_gaussian": _fused_inverse_gaussian,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _fused_glm_value_and_gradient(loss, X, y, coef):
|
|
105
|
+
"""Dispatch to fused kernel based on loss name (GLM-specific)."""
|
|
106
|
+
n = X.shape[0]
|
|
107
|
+
eta = X @ coef
|
|
108
|
+
loss_name = getattr(loss, "name", "")
|
|
109
|
+
if loss_name in _FUSED_DISPATCH:
|
|
110
|
+
return _FUSED_DISPATCH[loss_name](eta, X, y, n, loss)
|
|
111
|
+
return loss.value(X, y, coef), loss.gradient(X, y, coef)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _weighted_loss_and_grad(loss, X, y, coef, sample_weight):
|
|
115
|
+
"""Weighted loss+gradient (GLM-specific fast paths)."""
|
|
116
|
+
from statgpu.backends import _resolve_backend
|
|
117
|
+
n = X.shape[0]
|
|
118
|
+
_backend = _resolve_backend("auto", X)
|
|
119
|
+
xp = _get_xp(_backend)
|
|
120
|
+
_sw_np = _to_numpy(sample_weight)
|
|
121
|
+
if hasattr(X, "device"):
|
|
122
|
+
_sw = xp.asarray(_sw_np, dtype=X.dtype, device=X.device)
|
|
123
|
+
else:
|
|
124
|
+
_sw = xp.asarray(_sw_np, dtype=X.dtype)
|
|
125
|
+
sw_sum = _to_float_scalar(xp.sum(_sw))
|
|
126
|
+
|
|
127
|
+
loss_name = getattr(loss, "name", "")
|
|
128
|
+
if loss_name == "squared_error":
|
|
129
|
+
resid = X @ coef - y
|
|
130
|
+
grad = X.T @ (_sw * resid) / sw_sum
|
|
131
|
+
val = 0.5 * _to_float_scalar(xp.sum(_sw * resid * resid)) / sw_sum
|
|
132
|
+
return val, grad
|
|
133
|
+
|
|
134
|
+
if hasattr(loss, "fused_value_and_gradient"):
|
|
135
|
+
try:
|
|
136
|
+
return loss.fused_value_and_gradient(
|
|
137
|
+
X, y, coef, sample_weight=sample_weight
|
|
138
|
+
)
|
|
139
|
+
except TypeError:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
val = loss.value(X, y, coef, sample_weight=sample_weight)
|
|
144
|
+
grad = loss.gradient(X, y, coef, sample_weight=sample_weight)
|
|
145
|
+
return val, grad
|
|
146
|
+
except TypeError:
|
|
147
|
+
val = loss.value(X, y, coef)
|
|
148
|
+
grad = loss.gradient(X, y, coef)
|
|
149
|
+
return val, grad
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gamma loss: negative Gamma log-likelihood.
|
|
3
|
+
|
|
4
|
+
For positive continuous outcomes:
|
|
5
|
+
loss = (1/n) * sum(y/mu + log(mu))
|
|
6
|
+
where mu is determined by the configured link:
|
|
7
|
+
- log: mu = exp(X @ coef)
|
|
8
|
+
- inverse_power: mu = 1 / (X @ coef)
|
|
9
|
+
|
|
10
|
+
Supports numpy / cupy / torch backends via _array_ops helpers.
|
|
11
|
+
"""
|
|
12
|
+
from statgpu.backends._array_ops import _clip, _exp, _log, _sum, _max_eigval_power, _xp
|
|
13
|
+
from statgpu.glm_core._base import GLMLoss, register_glm_loss
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@register_glm_loss('gamma')
|
|
17
|
+
class GammaLoss(GLMLoss):
|
|
18
|
+
name = "gamma"
|
|
19
|
+
y_type = "positive"
|
|
20
|
+
smooth_gradient = True
|
|
21
|
+
has_hessian = True
|
|
22
|
+
_lipschitz_uses_y = True
|
|
23
|
+
_lipschitz_safety = 3.0 # Gamma Hessian varies with mu
|
|
24
|
+
_conservative_momentum_with_nonsmooth = True
|
|
25
|
+
_gamma_like = True
|
|
26
|
+
|
|
27
|
+
_MU_LO = 1e-3
|
|
28
|
+
_MU_HI = 1e4
|
|
29
|
+
_ETA_LO = 1e-4
|
|
30
|
+
_ETA_HI = 1e3
|
|
31
|
+
|
|
32
|
+
def __init__(self, link="log"):
|
|
33
|
+
if link not in ("log", "inverse_power"):
|
|
34
|
+
raise ValueError(
|
|
35
|
+
"GammaLoss link must be 'log' or 'inverse_power', "
|
|
36
|
+
f"got {link!r}."
|
|
37
|
+
)
|
|
38
|
+
self.link = link
|
|
39
|
+
self.link_name = link
|
|
40
|
+
self._lipschitz_at_init = link == "inverse_power"
|
|
41
|
+
self._has_constant_hessian = (link == "log")
|
|
42
|
+
|
|
43
|
+
def _eta_mu(self, X, coef):
|
|
44
|
+
eta = X @ coef
|
|
45
|
+
if self.link == "inverse_power":
|
|
46
|
+
eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
|
|
47
|
+
return eta_c, 1.0 / eta_c
|
|
48
|
+
z = _clip(eta, -30, 30)
|
|
49
|
+
return z, _clip(_exp(z), self._MU_LO, self._MU_HI)
|
|
50
|
+
|
|
51
|
+
def _mu_from_eta(self, eta):
|
|
52
|
+
if self.link == "inverse_power":
|
|
53
|
+
eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
|
|
54
|
+
return 1.0 / eta_c
|
|
55
|
+
return _clip(_exp(_clip(eta, -30, 30)), self._MU_LO, self._MU_HI)
|
|
56
|
+
|
|
57
|
+
# ── Per-sample formulas (single source of truth) ──────────────────
|
|
58
|
+
|
|
59
|
+
def per_sample_value(self, eta, y):
|
|
60
|
+
if self.link == "inverse_power":
|
|
61
|
+
eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
|
|
62
|
+
return y * eta_c - _log(eta_c)
|
|
63
|
+
mu = self._mu_from_eta(eta)
|
|
64
|
+
return y / mu + _log(mu)
|
|
65
|
+
|
|
66
|
+
def per_sample_gradient(self, eta, y):
|
|
67
|
+
if self.link == "inverse_power":
|
|
68
|
+
mu = self._mu_from_eta(eta)
|
|
69
|
+
return y - mu
|
|
70
|
+
mu = self._mu_from_eta(eta)
|
|
71
|
+
return 1.0 - y / mu
|
|
72
|
+
|
|
73
|
+
def hessian(self, X, y, coef, sample_weight=None):
|
|
74
|
+
n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
|
|
75
|
+
if self.link == "inverse_power":
|
|
76
|
+
eta, _ = self._eta_mu(X, coef)
|
|
77
|
+
W = 1.0 / (eta * eta)
|
|
78
|
+
else:
|
|
79
|
+
# Expected Fisher: W(mu) = 1 for Gamma with log link
|
|
80
|
+
xp = _xp(X)
|
|
81
|
+
if xp.__name__ == "torch":
|
|
82
|
+
W = xp.ones(X.shape[0], dtype=X.dtype, device=X.device)
|
|
83
|
+
else:
|
|
84
|
+
W = xp.ones(X.shape[0], dtype=X.dtype)
|
|
85
|
+
if sample_weight is not None:
|
|
86
|
+
W = W * sample_weight
|
|
87
|
+
return X.T @ (X * W[:, None]) / n_eff
|
|
88
|
+
|
|
89
|
+
def lipschitz(self, X, coef, y=None, sample_weight=None):
|
|
90
|
+
n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
|
|
91
|
+
if self.link == "inverse_power":
|
|
92
|
+
eta, _ = self._eta_mu(X, coef)
|
|
93
|
+
W = 1.0 / (eta * eta)
|
|
94
|
+
elif y is not None:
|
|
95
|
+
z = _clip(X @ coef, -30, 30)
|
|
96
|
+
mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
|
|
97
|
+
W = y / mu
|
|
98
|
+
else:
|
|
99
|
+
XtX = X.T @ X
|
|
100
|
+
return max(_max_eigval_power(XtX) / n_eff, 1e-8)
|
|
101
|
+
if sample_weight is not None:
|
|
102
|
+
W = W * sample_weight
|
|
103
|
+
XtWX = X.T @ (X * W[:, None])
|
|
104
|
+
L = _max_eigval_power(XtWX) / n_eff
|
|
105
|
+
return max(L, 1e-8)
|
|
106
|
+
|
|
107
|
+
def predict(self, X, coef):
|
|
108
|
+
if self.link == "inverse_power":
|
|
109
|
+
eta = _clip(X @ coef, self._ETA_LO, self._ETA_HI)
|
|
110
|
+
return 1.0 / eta
|
|
111
|
+
return _exp(X @ coef)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inverse Gaussian loss: negative log-likelihood with log link.
|
|
3
|
+
|
|
4
|
+
For positive right-skewed outcomes:
|
|
5
|
+
loss = (1/n) * sum(y/(2*mu^2) - 1/mu)
|
|
6
|
+
where mu = exp(X @ coef).
|
|
7
|
+
|
|
8
|
+
Supports numpy / cupy / torch backends via _array_ops helpers.
|
|
9
|
+
"""
|
|
10
|
+
from statgpu.backends._array_ops import _clip, _exp, _sum, _max_eigval_power
|
|
11
|
+
from statgpu.glm_core._base import GLMLoss, register_glm_loss
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@register_glm_loss('inverse_gaussian')
|
|
15
|
+
class InverseGaussianLoss(GLMLoss):
|
|
16
|
+
name = "inverse_gaussian"
|
|
17
|
+
y_type = "positive"
|
|
18
|
+
smooth_gradient = True
|
|
19
|
+
has_hessian = True
|
|
20
|
+
_lipschitz_uses_y = True
|
|
21
|
+
_lipschitz_safety = 3.0 # 1/mu^3 gradient scaling requires safety factor
|
|
22
|
+
_skip_momentum = True
|
|
23
|
+
_inverse_gaussian = True # 1/mu^3 scaling causes Nesterov oscillation
|
|
24
|
+
|
|
25
|
+
_MU_LO = 5e-2
|
|
26
|
+
_MU_HI = 1e3
|
|
27
|
+
|
|
28
|
+
def _mu_from_eta(self, eta):
|
|
29
|
+
return _clip(_exp(_clip(eta, -30, 30)), self._MU_LO, self._MU_HI)
|
|
30
|
+
|
|
31
|
+
# ── Per-sample formulas (single source of truth) ──────────────────
|
|
32
|
+
|
|
33
|
+
def per_sample_value(self, eta, y):
|
|
34
|
+
mu = self._mu_from_eta(eta)
|
|
35
|
+
return y / (2.0 * mu * mu) - 1.0 / mu
|
|
36
|
+
|
|
37
|
+
def per_sample_gradient(self, eta, y):
|
|
38
|
+
mu = self._mu_from_eta(eta)
|
|
39
|
+
return (mu - y) / (mu * mu)
|
|
40
|
+
|
|
41
|
+
def hessian(self, X, y, coef, sample_weight=None):
|
|
42
|
+
z = _clip(X @ coef, -30, 30)
|
|
43
|
+
mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
|
|
44
|
+
W = 1.0 / mu
|
|
45
|
+
if sample_weight is not None:
|
|
46
|
+
W = W * sample_weight
|
|
47
|
+
n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
|
|
48
|
+
return X.T @ (X * W[:, None]) / n_eff
|
|
49
|
+
|
|
50
|
+
def lipschitz(self, X, coef, y=None, sample_weight=None):
|
|
51
|
+
z = _clip(X @ coef, -30, 30)
|
|
52
|
+
mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
|
|
53
|
+
W = 1.0 / mu
|
|
54
|
+
if sample_weight is not None:
|
|
55
|
+
W = W * sample_weight
|
|
56
|
+
n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
|
|
57
|
+
XtWX = X.T @ (X * W[:, None])
|
|
58
|
+
L = _max_eigval_power(XtWX) / n_eff
|
|
59
|
+
return max(L, 1e-8)
|
|
60
|
+
|
|
61
|
+
def predict(self, X, coef):
|
|
62
|
+
return _exp(X @ coef)
|