statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,362 @@
1
+ """
2
+ Link and Family abstractions for GLM.
3
+
4
+ Extracted from the duplicated IRLS loops in _logistic.py across CPU/GPU/Torch backends.
5
+ Each Family defines: link function, variance function, and IRLS weights/working response.
6
+
7
+ All operations are backend-aware: numpy/cupy/torch via _xp dispatch.
8
+ """
9
+
10
+ from abc import ABC, abstractmethod
11
+ from typing import Any
12
+
13
+ import numpy as np
14
+
15
+ from statgpu.backends._array_ops import _clip, _log, _xp
16
+ from statgpu.inference._distributions_backend import get_distribution
17
+
18
+ __all__ = [
19
+ "Link", "IdentityLink", "LogLink", "InversePowerLink", "InverseSquaredLink",
20
+ "GLMFamily", "Gaussian", "Binomial", "Poisson", "Gamma",
21
+ "InverseGaussian", "NegativeBinomial", "Tweedie",
22
+ ]
23
+
24
+
25
+ def _backend_name(arr):
26
+ """Infer backend name from array type."""
27
+ mod = type(arr).__module__
28
+ if mod.startswith("cupy"):
29
+ return "cupy"
30
+ if mod.startswith("torch"):
31
+ return "torch"
32
+ return "numpy"
33
+
34
+
35
+ # _exp with overflow protection (clips input to [-500, 500])
36
+ _ETA_CLIP_EXP = 500.0
37
+
38
+
39
+ def _exp(arr):
40
+ """Exponential with overflow protection."""
41
+ return _xp(arr).exp(_clip(arr, -_ETA_CLIP_EXP, _ETA_CLIP_EXP))
42
+
43
+
44
+ def _sqrt(arr):
45
+ """Square root with clamp to prevent NaN from negative values."""
46
+ return _xp(arr).sqrt(_clip(arr, 0, None))
47
+
48
+
49
+ def _ones_like(arr):
50
+ return _xp(arr).ones_like(arr)
51
+
52
+
53
+ def _cdf(arr):
54
+ """Standard normal CDF (Phi)."""
55
+ backend = _backend_name(arr)
56
+ return get_distribution("norm", backend=backend).cdf(arr)
57
+
58
+
59
+ def _ppf(arr):
60
+ """Standard normal PPF (inverse CDF, Phi^{-1})."""
61
+ backend = _backend_name(arr)
62
+ return get_distribution("norm", backend=backend).ppf(arr)
63
+
64
+
65
+ def _pdf(arr):
66
+ """Standard normal PDF (phi)."""
67
+ backend = _backend_name(arr)
68
+ return get_distribution("norm", backend=backend).pdf(arr)
69
+
70
+
71
+ # ─── Link Functions ────────────────────────────────────────────────────────
72
+
73
+
74
+ class Link(ABC):
75
+ """Link function abstract base class.
76
+
77
+ Maps between mean (mu) and linear predictor (eta):
78
+ eta = link(mu)
79
+ mu = inverse(eta)
80
+ """
81
+
82
+ name: str
83
+
84
+ @abstractmethod
85
+ def link(self, mu):
86
+ """eta = g(mu)."""
87
+ pass
88
+
89
+ @abstractmethod
90
+ def inverse(self, eta):
91
+ """mu = g^{-1}(eta)."""
92
+ pass
93
+
94
+ @abstractmethod
95
+ def derivative(self, mu):
96
+ """g'(mu) = d eta / d mu."""
97
+ pass
98
+
99
+
100
+ class LogitLink(Link):
101
+ name = "logit"
102
+
103
+ def link(self, mu):
104
+ return _log(mu / (1 - mu))
105
+
106
+ def inverse(self, eta):
107
+ return 1.0 / (1.0 + _exp(-_clip(eta, -500, 500)))
108
+
109
+ def derivative(self, mu):
110
+ return 1.0 / (mu * (1 - mu))
111
+
112
+
113
+ class ProbitLink(Link):
114
+ """Probit link: inverse of standard normal CDF (Phi)."""
115
+
116
+ name = "probit"
117
+
118
+ def link(self, mu):
119
+ return _ppf(_clip(mu, 1e-10, 1 - 1e-10))
120
+
121
+ def inverse(self, eta):
122
+ return _cdf(eta)
123
+
124
+ def derivative(self, mu):
125
+ return 1.0 / _pdf(
126
+ _ppf(_clip(mu, 1e-10, 1 - 1e-10))
127
+ )
128
+
129
+
130
+ class LogLink(Link):
131
+ name = "log"
132
+
133
+ def link(self, mu):
134
+ return _log(_clip(mu, 1e-10, None))
135
+
136
+ def inverse(self, eta):
137
+ return _exp(eta)
138
+
139
+ def derivative(self, mu):
140
+ return 1.0 / mu
141
+
142
+
143
+ class IdentityLink(Link):
144
+ name = "identity"
145
+
146
+ def link(self, mu):
147
+ return mu
148
+
149
+ def inverse(self, eta):
150
+ return eta
151
+
152
+ def derivative(self, mu):
153
+ return _ones_like(mu)
154
+
155
+
156
+ class InversePowerLink(Link):
157
+ """Inverse power link: eta = 1/mu (canonical for Gamma)."""
158
+
159
+ name = "inverse_power"
160
+ _ETA_LO = 1e-4
161
+ _ETA_HI = 1e3
162
+
163
+ def link(self, mu):
164
+ return 1.0 / _clip(mu, 1e-10, None)
165
+
166
+ def inverse(self, eta):
167
+ return 1.0 / _clip(eta, self._ETA_LO, self._ETA_HI)
168
+
169
+ def derivative(self, mu):
170
+ return -1.0 / (mu * mu)
171
+
172
+
173
+ class InverseSquaredLink(Link):
174
+ """Inverse squared link: eta = 1/mu^2 (canonical for InverseGaussian)."""
175
+
176
+ name = "inverse_squared"
177
+
178
+ def link(self, mu):
179
+ return 1.0 / _clip(mu * mu, 1e-10, None)
180
+
181
+ def inverse(self, eta):
182
+ eta_c = _clip(eta, 1e-20, None)
183
+ return 1.0 / _clip(_sqrt(eta_c), 1e-10, None)
184
+
185
+ def derivative(self, mu):
186
+ return -2.0 / (mu * mu * mu)
187
+
188
+
189
+ # ─── Families ──────────────────────────────────────────────────────────────
190
+
191
+
192
+ class GLMFamily(ABC):
193
+ """GLM distribution family.
194
+
195
+ Each family defines:
196
+ - link function: eta <-> mu mapping
197
+ - variance function: Var(Y) = phi * V(mu)
198
+ - IRLS weights and working response computation
199
+ """
200
+
201
+ name: str
202
+ link: Link
203
+
204
+ @abstractmethod
205
+ def variance(self, mu):
206
+ """Variance function V(mu)."""
207
+ pass
208
+
209
+ def irls_weights(self, mu, y):
210
+ """IRLS working weights.
211
+
212
+ W = 1 / (V(mu) * (g'(mu))^2)
213
+
214
+ Default uses the inverse Fisher weights for the WLS step in IRLS.
215
+ Subclasses can override for more efficient implementations.
216
+ """
217
+ denom = self.variance(mu) * self.link.derivative(mu) ** 2
218
+ return 1.0 / _clip(denom, 1e-10, None)
219
+
220
+ def irls_working_response(self, mu, y, eta):
221
+ """Working response z = eta + (y - mu) * link'(mu)."""
222
+ return eta + (y - mu) * self.link.derivative(mu)
223
+
224
+
225
+ class Gaussian(GLMFamily):
226
+ """Gaussian family with identity link (standard linear regression)."""
227
+
228
+ name = "gaussian"
229
+ link = IdentityLink()
230
+
231
+ def variance(self, mu):
232
+ return _ones_like(mu)
233
+
234
+
235
+ class Binomial(GLMFamily):
236
+ """Binomial family with configurable link (logistic/probit regression)."""
237
+
238
+ name = "binomial"
239
+
240
+ def __init__(self, link=None):
241
+ self.link = link if link is not None else LogitLink()
242
+
243
+ def variance(self, mu):
244
+ return mu * (1 - mu)
245
+
246
+ def irls_weights(self, mu, y):
247
+ mu_c = _clip(mu, 1e-10, 1 - 1e-10)
248
+ return mu_c * (1 - mu_c)
249
+
250
+ def irls_working_response(self, mu, y, eta):
251
+ mu_c = _clip(mu, 1e-10, 1 - 1e-10)
252
+ var = mu_c * (1 - mu_c)
253
+ return eta + (y - mu_c) / var
254
+
255
+
256
+ class Poisson(GLMFamily):
257
+ """Poisson family with log link (Poisson regression)."""
258
+
259
+ name = "poisson"
260
+ link = LogLink()
261
+
262
+ def variance(self, mu):
263
+ return mu
264
+
265
+ def irls_weights(self, mu, y):
266
+ return _clip(mu, 1e-10, None)
267
+
268
+ def irls_working_response(self, mu, y, eta):
269
+ return eta + (y - mu) / _clip(mu, 1e-10, None)
270
+
271
+
272
+ class Gamma(GLMFamily):
273
+ """Gamma family (positive continuous outcomes).
274
+
275
+ Default link is log for numerical stability. Canonical link is inverse_power.
276
+ """
277
+
278
+ name = "gamma"
279
+
280
+ def __init__(self, link=None):
281
+ self.link = link if link is not None else LogLink()
282
+
283
+ def variance(self, mu):
284
+ return mu * mu
285
+
286
+
287
+ class InverseGaussian(GLMFamily):
288
+ """Inverse Gaussian family (positive continuous, right-skewed).
289
+
290
+ Default link is log for numerical stability.
291
+ """
292
+
293
+ name = "inverse_gaussian"
294
+ link = LogLink()
295
+
296
+ def variance(self, mu):
297
+ return mu * mu * mu
298
+
299
+ def irls_weights(self, mu, y):
300
+ mu_c = _clip(mu, 1e-10, None)
301
+ return _ones_like(mu) / mu_c
302
+
303
+ def irls_working_response(self, mu, y, eta):
304
+ mu_c = _clip(mu, 1e-10, None)
305
+ # z = eta + (y - mu) * g'(mu) = eta + (y - mu) / mu (log link)
306
+ return eta + (y - mu_c) / mu_c
307
+
308
+
309
+ class NegativeBinomial(GLMFamily):
310
+ """Negative Binomial family (overdispersed count data).
311
+
312
+ Uses log link. Dispersion parameter ``alpha`` controls overdispersion:
313
+ Var(Y) = mu + alpha * mu^2. When alpha -> 0, approaches Poisson.
314
+ """
315
+
316
+ name = "negative_binomial"
317
+ link = LogLink()
318
+
319
+ def __init__(self, alpha=1.0):
320
+ if not np.isfinite(alpha) or alpha <= 0.0:
321
+ raise ValueError("alpha must be a finite positive scalar for negative binomial family")
322
+ self.alpha = alpha
323
+
324
+ def variance(self, mu):
325
+ return mu + self.alpha * mu * mu
326
+
327
+ def irls_weights(self, mu, y):
328
+ mu_c = _clip(mu, 1e-10, None)
329
+ return mu_c / (1.0 + self.alpha * mu_c)
330
+
331
+ def irls_working_response(self, mu, y, eta):
332
+ mu_c = _clip(mu, 1e-10, None)
333
+ return eta + (y - mu_c) / mu_c
334
+
335
+
336
+ class Tweedie(GLMFamily):
337
+ """Tweedie family (power variance function).
338
+
339
+ Variance function: V(mu) = mu^power.
340
+ - power=0: Gaussian
341
+ - power=1: Poisson
342
+ - power=2: Gamma
343
+ - 1 < power < 2: compound Poisson-Gamma (most common usage)
344
+ """
345
+
346
+ name = "tweedie"
347
+ link = LogLink()
348
+
349
+ def __init__(self, power=1.5):
350
+ self.power = power
351
+
352
+ def variance(self, mu):
353
+ return _clip(mu, 1e-10, None) ** self.power
354
+
355
+ def irls_weights(self, mu, y):
356
+ mu_c = _clip(mu, 1e-10, None)
357
+ # w = 1 / (V(mu) * g'(mu)^2) = 1 / (mu^p * (1/mu)^2) = mu^(2-p)
358
+ return mu_c ** (2.0 - self.power)
359
+
360
+ def irls_working_response(self, mu, y, eta):
361
+ mu_c = _clip(mu, 1e-10, None)
362
+ return eta + (y - mu_c) / mu_c
@@ -0,0 +1,149 @@
1
+ """GLM-specific fused loss+gradient functions.
2
+
3
+ These avoid redundant X @ coef computation by computing value and gradient
4
+ in a single pass. They are called by GLMLoss subclasses' fused_value_and_gradient()
5
+ methods for performance.
6
+
7
+ NOT part of the generic solver interface — these are GLM internal optimizations.
8
+ """
9
+
10
+ from statgpu.backends._utils import _to_float_scalar, _get_xp
11
+ from statgpu.backends import _to_numpy
12
+
13
+
14
+ def _fused_logistic(eta, X, y, n, loss):
15
+ from statgpu.backends._array_ops import _sigmoid, _softplus, _sum
16
+ p = _sigmoid(eta)
17
+ log1pexp = _softplus(eta)
18
+ val = _sum(-y * eta + log1pexp) / n
19
+ grad = X.T @ (p - y) / n
20
+ return val, grad
21
+
22
+
23
+ def _fused_poisson(eta, X, y, n, loss):
24
+ from statgpu.backends._array_ops import _exp, _log, _clip, _sum
25
+ mu = _exp(_clip(eta, -30, 30))
26
+ mu_c = _clip(mu, 1e-10, None)
27
+ val = _sum(mu - y * _log(mu_c)) / n
28
+ grad = X.T @ (mu - y) / n
29
+ return val, grad
30
+
31
+
32
+ def _fused_gamma(eta, X, y, n, loss):
33
+ from statgpu.backends._array_ops import _exp, _log, _clip, _sum
34
+ gamma_link = getattr(loss, "link_name", getattr(loss, "link", "log"))
35
+ if gamma_link == "inverse_power":
36
+ eta_lo = float(getattr(loss, "_ETA_LO", 1e-2))
37
+ eta_hi = float(getattr(loss, "_ETA_HI", 1e3))
38
+ eta_c = _clip(eta, eta_lo, eta_hi)
39
+ mu = 1.0 / eta_c
40
+ val = _sum(y * eta_c - _log(eta_c)) / n
41
+ grad = X.T @ (y - mu) / n
42
+ return val, grad
43
+ mu = _exp(_clip(eta, -30, 30))
44
+ mu_c = _clip(mu, 1e-10, None)
45
+ val = _sum(y / mu_c + _log(mu_c)) / n
46
+ grad = X.T @ ((mu_c - y) / mu_c) / n
47
+ return val, grad
48
+
49
+
50
+ def _fused_negative_binomial(eta, X, y, n, loss):
51
+ from statgpu.backends._array_ops import _exp, _log, _clip, _sum
52
+ a = float(getattr(loss, "alpha", 1.0))
53
+ mu = _exp(_clip(eta, -30, 30))
54
+ mu_c = _clip(mu, 1e-300, None)
55
+ one_plus_a_mu = 1.0 + a * mu_c
56
+ val = (
57
+ _sum(-y * _log(mu_c / one_plus_a_mu) + (1.0 / a) * _log(one_plus_a_mu)) / n
58
+ )
59
+ grad = X.T @ ((mu_c - y) / one_plus_a_mu) / n
60
+ return val, grad
61
+
62
+
63
+ def _fused_tweedie(eta, X, y, n, loss):
64
+ from statgpu.backends._array_ops import _exp, _clip, _sum, _log
65
+ pw = float(getattr(loss, "power", 1.5))
66
+ mu = _exp(_clip(eta, -50, 50))
67
+ mu_c = _clip(mu, 1e-10, 1e6)
68
+ log_mu = _log(mu_c)
69
+ d1 = 1.0 - pw
70
+ d2 = 2.0 - pw
71
+ if abs(d1) < 0.01:
72
+ term1 = -y * log_mu
73
+ else:
74
+ term1 = -y * mu_c**d1 / d1
75
+ if abs(d2) < 0.01:
76
+ term2 = log_mu
77
+ else:
78
+ term2 = mu_c**d2 / d2
79
+ val = _sum(term1 + term2) / n
80
+ grad = X.T @ (mu_c**d1 * (mu_c - y)) / n
81
+ return val, grad
82
+
83
+
84
+ def _fused_inverse_gaussian(eta, X, y, n, loss):
85
+ from statgpu.backends._array_ops import _exp, _clip, _sum
86
+ mu = _exp(_clip(eta, -30, 30))
87
+ mu_c = _clip(mu, 5e-2, 1e3)
88
+ val = _sum(y / (2.0 * mu_c * mu_c) - 1.0 / mu_c) / n
89
+ grad = X.T @ ((mu_c - y) / (mu_c * mu_c)) / n
90
+ return val, grad
91
+
92
+
93
+ # Module-level dispatch table (avoids rebuilding dict on every call)
94
+ _FUSED_DISPATCH = {
95
+ "logistic": _fused_logistic,
96
+ "poisson": _fused_poisson,
97
+ "gamma": _fused_gamma,
98
+ "negative_binomial": _fused_negative_binomial,
99
+ "tweedie": _fused_tweedie,
100
+ "inverse_gaussian": _fused_inverse_gaussian,
101
+ }
102
+
103
+
104
+ def _fused_glm_value_and_gradient(loss, X, y, coef):
105
+ """Dispatch to fused kernel based on loss name (GLM-specific)."""
106
+ n = X.shape[0]
107
+ eta = X @ coef
108
+ loss_name = getattr(loss, "name", "")
109
+ if loss_name in _FUSED_DISPATCH:
110
+ return _FUSED_DISPATCH[loss_name](eta, X, y, n, loss)
111
+ return loss.value(X, y, coef), loss.gradient(X, y, coef)
112
+
113
+
114
+ def _weighted_loss_and_grad(loss, X, y, coef, sample_weight):
115
+ """Weighted loss+gradient (GLM-specific fast paths)."""
116
+ from statgpu.backends import _resolve_backend
117
+ n = X.shape[0]
118
+ _backend = _resolve_backend("auto", X)
119
+ xp = _get_xp(_backend)
120
+ _sw_np = _to_numpy(sample_weight)
121
+ if hasattr(X, "device"):
122
+ _sw = xp.asarray(_sw_np, dtype=X.dtype, device=X.device)
123
+ else:
124
+ _sw = xp.asarray(_sw_np, dtype=X.dtype)
125
+ sw_sum = _to_float_scalar(xp.sum(_sw))
126
+
127
+ loss_name = getattr(loss, "name", "")
128
+ if loss_name == "squared_error":
129
+ resid = X @ coef - y
130
+ grad = X.T @ (_sw * resid) / sw_sum
131
+ val = 0.5 * _to_float_scalar(xp.sum(_sw * resid * resid)) / sw_sum
132
+ return val, grad
133
+
134
+ if hasattr(loss, "fused_value_and_gradient"):
135
+ try:
136
+ return loss.fused_value_and_gradient(
137
+ X, y, coef, sample_weight=sample_weight
138
+ )
139
+ except TypeError:
140
+ pass
141
+
142
+ try:
143
+ val = loss.value(X, y, coef, sample_weight=sample_weight)
144
+ grad = loss.gradient(X, y, coef, sample_weight=sample_weight)
145
+ return val, grad
146
+ except TypeError:
147
+ val = loss.value(X, y, coef)
148
+ grad = loss.gradient(X, y, coef)
149
+ return val, grad
@@ -0,0 +1,111 @@
1
+ """
2
+ Gamma loss: negative Gamma log-likelihood.
3
+
4
+ For positive continuous outcomes:
5
+ loss = (1/n) * sum(y/mu + log(mu))
6
+ where mu is determined by the configured link:
7
+ - log: mu = exp(X @ coef)
8
+ - inverse_power: mu = 1 / (X @ coef)
9
+
10
+ Supports numpy / cupy / torch backends via _array_ops helpers.
11
+ """
12
+ from statgpu.backends._array_ops import _clip, _exp, _log, _sum, _max_eigval_power, _xp
13
+ from statgpu.glm_core._base import GLMLoss, register_glm_loss
14
+
15
+
16
+ @register_glm_loss('gamma')
17
+ class GammaLoss(GLMLoss):
18
+ name = "gamma"
19
+ y_type = "positive"
20
+ smooth_gradient = True
21
+ has_hessian = True
22
+ _lipschitz_uses_y = True
23
+ _lipschitz_safety = 3.0 # Gamma Hessian varies with mu
24
+ _conservative_momentum_with_nonsmooth = True
25
+ _gamma_like = True
26
+
27
+ _MU_LO = 1e-3
28
+ _MU_HI = 1e4
29
+ _ETA_LO = 1e-4
30
+ _ETA_HI = 1e3
31
+
32
+ def __init__(self, link="log"):
33
+ if link not in ("log", "inverse_power"):
34
+ raise ValueError(
35
+ "GammaLoss link must be 'log' or 'inverse_power', "
36
+ f"got {link!r}."
37
+ )
38
+ self.link = link
39
+ self.link_name = link
40
+ self._lipschitz_at_init = link == "inverse_power"
41
+ self._has_constant_hessian = (link == "log")
42
+
43
+ def _eta_mu(self, X, coef):
44
+ eta = X @ coef
45
+ if self.link == "inverse_power":
46
+ eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
47
+ return eta_c, 1.0 / eta_c
48
+ z = _clip(eta, -30, 30)
49
+ return z, _clip(_exp(z), self._MU_LO, self._MU_HI)
50
+
51
+ def _mu_from_eta(self, eta):
52
+ if self.link == "inverse_power":
53
+ eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
54
+ return 1.0 / eta_c
55
+ return _clip(_exp(_clip(eta, -30, 30)), self._MU_LO, self._MU_HI)
56
+
57
+ # ── Per-sample formulas (single source of truth) ──────────────────
58
+
59
+ def per_sample_value(self, eta, y):
60
+ if self.link == "inverse_power":
61
+ eta_c = _clip(eta, self._ETA_LO, self._ETA_HI)
62
+ return y * eta_c - _log(eta_c)
63
+ mu = self._mu_from_eta(eta)
64
+ return y / mu + _log(mu)
65
+
66
+ def per_sample_gradient(self, eta, y):
67
+ if self.link == "inverse_power":
68
+ mu = self._mu_from_eta(eta)
69
+ return y - mu
70
+ mu = self._mu_from_eta(eta)
71
+ return 1.0 - y / mu
72
+
73
+ def hessian(self, X, y, coef, sample_weight=None):
74
+ n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
75
+ if self.link == "inverse_power":
76
+ eta, _ = self._eta_mu(X, coef)
77
+ W = 1.0 / (eta * eta)
78
+ else:
79
+ # Expected Fisher: W(mu) = 1 for Gamma with log link
80
+ xp = _xp(X)
81
+ if xp.__name__ == "torch":
82
+ W = xp.ones(X.shape[0], dtype=X.dtype, device=X.device)
83
+ else:
84
+ W = xp.ones(X.shape[0], dtype=X.dtype)
85
+ if sample_weight is not None:
86
+ W = W * sample_weight
87
+ return X.T @ (X * W[:, None]) / n_eff
88
+
89
+ def lipschitz(self, X, coef, y=None, sample_weight=None):
90
+ n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
91
+ if self.link == "inverse_power":
92
+ eta, _ = self._eta_mu(X, coef)
93
+ W = 1.0 / (eta * eta)
94
+ elif y is not None:
95
+ z = _clip(X @ coef, -30, 30)
96
+ mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
97
+ W = y / mu
98
+ else:
99
+ XtX = X.T @ X
100
+ return max(_max_eigval_power(XtX) / n_eff, 1e-8)
101
+ if sample_weight is not None:
102
+ W = W * sample_weight
103
+ XtWX = X.T @ (X * W[:, None])
104
+ L = _max_eigval_power(XtWX) / n_eff
105
+ return max(L, 1e-8)
106
+
107
+ def predict(self, X, coef):
108
+ if self.link == "inverse_power":
109
+ eta = _clip(X @ coef, self._ETA_LO, self._ETA_HI)
110
+ return 1.0 / eta
111
+ return _exp(X @ coef)
@@ -0,0 +1,62 @@
1
+ """
2
+ Inverse Gaussian loss: negative log-likelihood with log link.
3
+
4
+ For positive right-skewed outcomes:
5
+ loss = (1/n) * sum(y/(2*mu^2) - 1/mu)
6
+ where mu = exp(X @ coef).
7
+
8
+ Supports numpy / cupy / torch backends via _array_ops helpers.
9
+ """
10
+ from statgpu.backends._array_ops import _clip, _exp, _sum, _max_eigval_power
11
+ from statgpu.glm_core._base import GLMLoss, register_glm_loss
12
+
13
+
14
+ @register_glm_loss('inverse_gaussian')
15
+ class InverseGaussianLoss(GLMLoss):
16
+ name = "inverse_gaussian"
17
+ y_type = "positive"
18
+ smooth_gradient = True
19
+ has_hessian = True
20
+ _lipschitz_uses_y = True
21
+ _lipschitz_safety = 3.0 # 1/mu^3 gradient scaling requires safety factor
22
+ _skip_momentum = True
23
+ _inverse_gaussian = True # 1/mu^3 scaling causes Nesterov oscillation
24
+
25
+ _MU_LO = 5e-2
26
+ _MU_HI = 1e3
27
+
28
+ def _mu_from_eta(self, eta):
29
+ return _clip(_exp(_clip(eta, -30, 30)), self._MU_LO, self._MU_HI)
30
+
31
+ # ── Per-sample formulas (single source of truth) ──────────────────
32
+
33
+ def per_sample_value(self, eta, y):
34
+ mu = self._mu_from_eta(eta)
35
+ return y / (2.0 * mu * mu) - 1.0 / mu
36
+
37
+ def per_sample_gradient(self, eta, y):
38
+ mu = self._mu_from_eta(eta)
39
+ return (mu - y) / (mu * mu)
40
+
41
+ def hessian(self, X, y, coef, sample_weight=None):
42
+ z = _clip(X @ coef, -30, 30)
43
+ mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
44
+ W = 1.0 / mu
45
+ if sample_weight is not None:
46
+ W = W * sample_weight
47
+ n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
48
+ return X.T @ (X * W[:, None]) / n_eff
49
+
50
+ def lipschitz(self, X, coef, y=None, sample_weight=None):
51
+ z = _clip(X @ coef, -30, 30)
52
+ mu = _clip(_exp(z), self._MU_LO, self._MU_HI)
53
+ W = 1.0 / mu
54
+ if sample_weight is not None:
55
+ W = W * sample_weight
56
+ n_eff = float(sample_weight.sum()) if sample_weight is not None else X.shape[0]
57
+ XtWX = X.T @ (X * W[:, None])
58
+ L = _max_eigval_power(XtWX) / n_eff
59
+ return max(L, 1e-8)
60
+
61
+ def predict(self, X, coef):
62
+ return _exp(X @ coef)