statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,214 @@
1
+ """
2
+ GPU utility functions for full GPU computation.
3
+ All statistical computations on GPU.
4
+ """
5
+
6
+ import numpy as np
7
+
8
+ from statgpu.inference._distributions_backend import (
9
+ norm,
10
+ t,
11
+ regularized_betainc_gpu,
12
+ )
13
+
14
+
15
+ def t_two_tail_pvalues_gpu(t_abs, df_resid):
16
+ """Backward-compatible alias for two-sided t p-values on GPU."""
17
+ return t.two_sided_pvalue(t_abs, df=df_resid)
18
+
19
+
20
+ def t_crit_gpu_two_tail(alpha, df_resid, *, max_bisect_steps: int = 60):
21
+ """Backward-compatible alias for two-sided t critical value on GPU."""
22
+ return t.two_sided_critical_value(
23
+ alpha,
24
+ df=df_resid,
25
+ max_bisect_steps=max_bisect_steps,
26
+ )
27
+
28
+
29
+ def norm_two_tail_pvalues_gpu(z_abs):
30
+ """Backward-compatible alias for two-sided normal p-values on GPU."""
31
+ return norm.two_sided_pvalue(z_abs)
32
+
33
+
34
+ def norm_crit_gpu_two_tail(alpha):
35
+ """Backward-compatible alias for two-sided normal critical value on GPU."""
36
+ return norm.two_sided_critical_value(alpha)
37
+
38
+
39
+ def compute_inference_gpu(X_design, resid, scale, df_resid, params_gpu):
40
+ """
41
+ Compute standard errors, t-values, p-values on GPU.
42
+
43
+ Parameters
44
+ ----------
45
+ X_design : cupy.ndarray
46
+ Design matrix on GPU.
47
+ resid : cupy.ndarray
48
+ Residuals on GPU.
49
+ scale : float or cupy.ndarray
50
+ Error variance estimate.
51
+ df_resid : int
52
+ Degrees of freedom.
53
+ params_gpu : cupy.ndarray
54
+ Parameters on GPU.
55
+
56
+ Returns
57
+ -------
58
+ bse_gpu : cupy.ndarray
59
+ Standard errors on GPU.
60
+ tvalues_gpu : cupy.ndarray
61
+ t-statistics on GPU.
62
+ pvalues_gpu : cupy.ndarray
63
+ p-values on GPU.
64
+ conf_int_gpu : cupy.ndarray
65
+ Confidence intervals on GPU.
66
+ """
67
+ import cupy as cp
68
+
69
+ # Compute (X'X)^-1 on GPU
70
+ XtX = cp.matmul(X_design.T, X_design)
71
+
72
+ try:
73
+ # Use Cholesky for inversion
74
+ L = cp.linalg.cholesky(XtX)
75
+ XtX_inv = cp.linalg.inv(XtX) # Simpler but less stable
76
+ except Exception:
77
+ # Fallback to pseudo-inverse
78
+ XtX_inv = cp.linalg.pinv(XtX)
79
+
80
+ # Standard errors: sqrt(scale * diag((X'X)^-1))
81
+ bse_gpu = cp.sqrt(cp.maximum(scale * cp.diag(XtX_inv), 0.0))
82
+
83
+ # t-statistics (add epsilon to avoid division by zero for collinear features)
84
+ tvalues_gpu = params_gpu / (bse_gpu + 1e-30)
85
+
86
+ # p-values (two-tailed t-test), entirely on GPU.
87
+ pvalues_gpu = t.two_sided_pvalue(tvalues_gpu, df=df_resid)
88
+
89
+ # Confidence intervals (95%)
90
+ alpha = 0.05 # two-tailed significance level for 95% CI
91
+ t_crit_gpu = cp.asarray(
92
+ t.two_sided_critical_value(alpha, df=df_resid),
93
+ dtype=bse_gpu.dtype,
94
+ )
95
+
96
+ margin = t_crit_gpu * bse_gpu
97
+ conf_int_lower = params_gpu - margin
98
+ conf_int_upper = params_gpu + margin
99
+ conf_int_gpu = cp.stack([conf_int_lower, conf_int_upper], axis=1)
100
+
101
+ return bse_gpu, tvalues_gpu, pvalues_gpu, conf_int_gpu
102
+
103
+
104
+ def compute_r2_gpu(y, resid):
105
+ """
106
+ Compute R-squared on GPU.
107
+
108
+ Parameters
109
+ ----------
110
+ y : cupy.ndarray
111
+ True values on GPU.
112
+ resid : cupy.ndarray
113
+ Residuals on GPU.
114
+
115
+ Returns
116
+ -------
117
+ r2 : float
118
+ R-squared value.
119
+ """
120
+ import cupy as cp
121
+
122
+ y_mean = y.mean()
123
+ ss_res = cp.sum(resid ** 2)
124
+ ss_tot = cp.sum((y - y_mean) ** 2)
125
+ r2 = 1 - ss_res / ss_tot
126
+ return float(cp.asnumpy(r2))
127
+
128
+
129
+ def compute_aic_bic_gpu(n, k, scale):
130
+ """
131
+ Compute AIC/BIC on GPU.
132
+
133
+ Parameters
134
+ ----------
135
+ n : int
136
+ Number of observations.
137
+ k : int
138
+ Number of parameters.
139
+ scale : float or cupy.ndarray
140
+ Error variance (MLE estimate: RSS/n).
141
+
142
+ Returns
143
+ -------
144
+ aic : float
145
+ AIC value.
146
+ bic : float
147
+ BIC value.
148
+ """
149
+ import cupy as cp
150
+
151
+ # Convert to cupy if needed
152
+ if not hasattr(scale, 'get'):
153
+ scale = cp.array(scale)
154
+
155
+ # AIC = n * log(scale) + 2*k
156
+ # BIC = n * log(scale) + k * log(n)
157
+ n_gpu = cp.array(float(n))
158
+ k_gpu = cp.array(float(k))
159
+
160
+ aic_gpu = n_gpu * cp.log(scale) + 2 * k_gpu
161
+ bic_gpu = n_gpu * cp.log(scale) + k_gpu * cp.log(n_gpu)
162
+
163
+ return float(cp.asnumpy(aic_gpu)), float(cp.asnumpy(bic_gpu))
164
+
165
+
166
+ def compute_f_stat_gpu(y, resid, X_design, df_resid):
167
+ """
168
+ Compute F-statistic on GPU.
169
+
170
+ Parameters
171
+ ----------
172
+ y : cupy.ndarray
173
+ True values on GPU.
174
+ resid : cupy.ndarray
175
+ Residuals on GPU.
176
+ X_design : cupy.ndarray
177
+ Design matrix on GPU.
178
+ df_resid : int
179
+ Residual degrees of freedom.
180
+
181
+ Returns
182
+ -------
183
+ fvalue : float
184
+ F-statistic.
185
+ """
186
+ import cupy as cp
187
+
188
+ y_mean = y.mean()
189
+ ss_tot = cp.sum((y - y_mean) ** 2)
190
+ ss_res = cp.sum(resid ** 2)
191
+ ss_reg = ss_tot - ss_res
192
+
193
+ k = X_design.shape[1] - 1 # exclude intercept
194
+ if k == 0 or ss_res <= 0:
195
+ return np.inf
196
+
197
+ fvalue_gpu = (ss_reg / k) / (ss_res / df_resid)
198
+ fvalue = float(cp.asnumpy(fvalue_gpu))
199
+
200
+ # p-value on GPU using F CDF expressed via regularized incomplete beta.
201
+ #
202
+ # For F ~ F(d1, d2):
203
+ # CDF(x) = I_{ d1 x / (d1 x + d2) }(d1/2, d2/2)
204
+ # pvalue = 1 - CDF
205
+ d1 = float(k)
206
+ d2 = float(df_resid)
207
+ if d2 <= 0 or d1 <= 0:
208
+ pvalue = 1.0
209
+ else:
210
+ z = (d1 * fvalue) / (d1 * fvalue + d2)
211
+ cdf = regularized_betainc_gpu(d1 / 2.0, d2 / 2.0, cp.asarray(z))
212
+ pvalue = float(1.0 - cp.asnumpy(cdf))
213
+
214
+ return fvalue, pvalue
@@ -0,0 +1,422 @@
1
+ """
2
+ Torch-specific GPU utility functions for full GPU computation.
3
+
4
+ This module mirrors _gpu_utils.py but uses PyTorch operations instead of CuPy.
5
+ All statistical computations run on GPU via Torch.
6
+ """
7
+
8
+ import numpy as np
9
+
10
+ from statgpu.backends import _get_torch_device_str as _get_torch_device
11
+
12
+
13
+ def _import_torch():
14
+ """Deferred torch import."""
15
+ try:
16
+ import torch
17
+ return torch
18
+ except ImportError as exc:
19
+ raise RuntimeError("PyTorch (torch) is required for Torch backend") from exc
20
+
21
+
22
+ def t_two_tail_pvalues_torch(t_abs, df_resid, device=None):
23
+ """
24
+ Backward-compatible alias for two-sided t p-values on Torch GPU.
25
+
26
+ Parameters
27
+ ----------
28
+ t_abs : torch.Tensor or array-like
29
+ Absolute t-statistics.
30
+ df_resid : int or float
31
+ Residual degrees of freedom.
32
+ device : str, optional
33
+ Torch device string.
34
+
35
+ Returns
36
+ -------
37
+ torch.Tensor
38
+ Two-sided p-values.
39
+ """
40
+ from statgpu.inference._distributions_backend import get_distribution
41
+ t_dist = get_distribution("t", backend="torch", device=device)
42
+ return t_dist.two_sided_pvalue(t_abs, df=df_resid)
43
+
44
+
45
+ def t_crit_torch_two_tail_torch(alpha, df_resid, *, max_bisect_steps=60, device=None):
46
+ """
47
+ Backward-compatible alias for two-sided t critical value on Torch GPU.
48
+
49
+ Parameters
50
+ ----------
51
+ alpha : float
52
+ Significance level (e.g., 0.05 for 95% CI).
53
+ df_resid : int or float
54
+ Residual degrees of freedom.
55
+ max_bisect_steps : int, default=60
56
+ Maximum bisection iterations for quantile computation.
57
+ device : str, optional
58
+ Torch device string.
59
+
60
+ Returns
61
+ -------
62
+ torch.Tensor
63
+ Critical t-value.
64
+ """
65
+ from statgpu.inference._distributions_backend import get_distribution
66
+ t_dist = get_distribution("t", backend="torch", device=device)
67
+ return t_dist.two_sided_critical_value(alpha, df=df_resid, max_bisect_steps=max_bisect_steps)
68
+
69
+
70
+ def norm_two_tail_pvalues_torch(z_abs, device=None):
71
+ """
72
+ Backward-compatible alias for two-sided normal p-values on Torch GPU.
73
+
74
+ Parameters
75
+ ----------
76
+ z_abs : torch.Tensor or array-like
77
+ Absolute z-statistics.
78
+ device : str, optional
79
+ Torch device string.
80
+
81
+ Returns
82
+ -------
83
+ torch.Tensor
84
+ Two-sided p-values.
85
+ """
86
+ from statgpu.inference._distributions_backend import norm
87
+ return norm.two_sided_pvalue(z_abs, backend="torch", device=device)
88
+
89
+
90
+ def norm_crit_torch_two_tail_torch(alpha, device=None):
91
+ """
92
+ Backward-compatible alias for two-sided normal critical value on Torch GPU.
93
+
94
+ Parameters
95
+ ----------
96
+ alpha : float
97
+ Significance level (e.g., 0.05 for 95% CI).
98
+ device : str, optional
99
+ Torch device string.
100
+
101
+ Returns
102
+ -------
103
+ torch.Tensor
104
+ Critical z-value.
105
+ """
106
+ from statgpu.inference._distributions_backend import norm
107
+ return norm.two_sided_critical_value(alpha, backend="torch")
108
+
109
+
110
+ def compute_inference_torch(X_design, resid, scale, df_resid, params_torch, cov_type="nonrobust", device=None):
111
+ """
112
+ Compute standard errors, t-values, p-values, and confidence intervals on Torch GPU.
113
+
114
+ Parameters
115
+ ----------
116
+ X_design : torch.Tensor
117
+ Design matrix on GPU.
118
+ resid : torch.Tensor
119
+ Residuals on GPU.
120
+ scale : float or torch.Tensor
121
+ Error variance estimate (sigma^2).
122
+ df_resid : int
123
+ Degrees of freedom.
124
+ params_torch : torch.Tensor
125
+ Parameters on GPU.
126
+ cov_type : str, default='nonrobust'
127
+ Covariance type: 'nonrobust', 'hc0', 'hc1', 'hc2', 'hc3', 'hac'.
128
+ device : str, optional
129
+ Torch device string.
130
+
131
+ Returns
132
+ -------
133
+ bse_torch : torch.Tensor
134
+ Standard errors on GPU.
135
+ tvalues_torch : torch.Tensor
136
+ t-statistics on GPU.
137
+ pvalues_torch : torch.Tensor
138
+ p-values on GPU.
139
+ conf_int_torch : torch.Tensor
140
+ Confidence intervals on GPU.
141
+ """
142
+ torch = _import_torch()
143
+
144
+ if device is None:
145
+ device = _get_torch_device()
146
+
147
+ from statgpu.inference._distributions_backend import get_distribution
148
+ t_dist = get_distribution("t", backend="torch", device=device)
149
+
150
+ # Compute (X'X)^-1 on GPU
151
+ XtX = torch.matmul(X_design.T, X_design)
152
+
153
+ try:
154
+ # Use Cholesky for inversion (more stable for positive definite)
155
+ L = torch.linalg.cholesky(XtX)
156
+ # Solve L @ L.T @ x = b for each column
157
+ XtX_inv = torch.cholesky_inverse(L)
158
+ except torch.linalg.LinAlgError:
159
+ # Fallback to pseudo-inverse
160
+ XtX_inv = torch.linalg.pinv(XtX)
161
+
162
+ # Handle HC2/HC3 leverage adjustment
163
+ if cov_type in ("hc2", "hc3"):
164
+ # Compute leverage values: h_ii = diag(X @ (X'X)^-1 @ X')
165
+ # Using Cholesky L where L @ L' = (X'X)^-1:
166
+ # h_ii = sum((X @ L) * (X @ L), dim=1) = diag(X @ L @ L' @ X')
167
+ XtX_inv_half = torch.linalg.cholesky(XtX_inv)
168
+ X_white = torch.matmul(X_design, XtX_inv_half)
169
+ leverage = torch.sum(X_white * X_white, dim=1)
170
+ leverage = torch.clamp(leverage, 0.0, 1.0 - 1e-12)
171
+
172
+ if cov_type == "hc2":
173
+ # HC2: e2 / (1 - h_ii)
174
+ e2 = torch.square(resid) / (1.0 - leverage)
175
+ else:
176
+ # HC3: e2 / (1 - h_ii)^2
177
+ e2 = torch.square(resid) / torch.square(1.0 - leverage)
178
+
179
+ # Sandwich: (X'X)^-1 @ (X' @ diag(e2) @ X) @ (X'X)^-1
180
+ Xw = X_design * e2[:, None]
181
+ meat = torch.matmul(X_design.T, Xw)
182
+ cov_params = torch.matmul(XtX_inv, torch.matmul(meat, XtX_inv))
183
+ bse_torch = torch.sqrt(torch.clamp(torch.diag(cov_params), 0.0))
184
+ elif cov_type == "hc1":
185
+ # HC1: sandwich with finite-sample correction
186
+ # meat = X' @ diag(resid^2) @ X * n/(n-k)
187
+ n, k = X_design.shape
188
+ df_scale = n / (n - k) if n > k else 1.0
189
+ e2 = torch.square(resid) * df_scale
190
+ Xw = X_design * e2[:, None]
191
+ meat = torch.matmul(X_design.T, Xw)
192
+ cov_params = torch.matmul(XtX_inv, torch.matmul(meat, XtX_inv))
193
+ bse_torch = torch.sqrt(torch.clamp(torch.diag(cov_params), 0.0))
194
+ else:
195
+ # Nonrobust (HC0-style): scale * diag((X'X)^-1)
196
+ bse_torch = torch.sqrt(scale * torch.clamp(torch.diag(XtX_inv), 0.0))
197
+
198
+ # t-statistics
199
+ tvalues_torch = params_torch / (bse_torch + 1e-30)
200
+
201
+ # p-values (two-tailed t-test), entirely on GPU
202
+ pvalues_torch = t_dist.two_sided_pvalue(tvalues_torch, df=df_resid)
203
+
204
+ # Confidence intervals (95%)
205
+ alpha = 0.05 # two-tailed significance level for 95% CI
206
+ t_crit = t_dist.two_sided_critical_value(alpha, df=df_resid)
207
+
208
+ margin = t_crit * bse_torch
209
+ conf_int_lower = params_torch - margin
210
+ conf_int_upper = params_torch + margin
211
+ conf_int_torch = torch.stack([conf_int_lower, conf_int_upper], dim=1)
212
+
213
+ return bse_torch, tvalues_torch, pvalues_torch, conf_int_torch
214
+
215
+
216
+ def compute_r2_torch(y, resid):
217
+ """
218
+ Compute R-squared on Torch GPU.
219
+
220
+ Parameters
221
+ ----------
222
+ y : torch.Tensor
223
+ True values on GPU.
224
+ resid : torch.Tensor
225
+ Residuals on GPU.
226
+
227
+ Returns
228
+ -------
229
+ r2 : float
230
+ R-squared value.
231
+ """
232
+ torch = _import_torch()
233
+
234
+ y_mean = torch.mean(y)
235
+ ss_res = torch.sum(resid ** 2)
236
+ ss_tot = torch.sum((y - y_mean) ** 2)
237
+ r2 = 1 - ss_res / ss_tot
238
+ return float(r2.cpu().numpy())
239
+
240
+
241
+ def compute_aic_bic_torch(n, k, scale, device=None):
242
+ """
243
+ Compute AIC/BIC on Torch GPU.
244
+
245
+ Parameters
246
+ ----------
247
+ n : int
248
+ Number of observations.
249
+ k : int
250
+ Number of parameters.
251
+ scale : float or torch.Tensor
252
+ Error variance (MLE estimate: RSS/n).
253
+ device : str, optional
254
+ Torch device string.
255
+
256
+ Returns
257
+ -------
258
+ aic : float
259
+ AIC value.
260
+ bic : float
261
+ BIC value.
262
+ """
263
+ torch = _import_torch()
264
+
265
+ if device is None:
266
+ device = _get_torch_device()
267
+
268
+ # Convert to torch if needed
269
+ if not isinstance(scale, torch.Tensor):
270
+ scale = torch.tensor(scale, dtype=torch.float64, device=device)
271
+
272
+ # AIC = n * log(scale) + 2*k
273
+ # BIC = n * log(scale) + k * log(n)
274
+ n_tensor = torch.tensor(float(n), dtype=torch.float64, device=device)
275
+ k_tensor = torch.tensor(float(k), dtype=torch.float64, device=device)
276
+
277
+ aic_tensor = n_tensor * torch.log(scale) + 2 * k_tensor
278
+ bic_tensor = n_tensor * torch.log(scale) + k_tensor * torch.log(n_tensor)
279
+
280
+ return float(aic_tensor.cpu().numpy()), float(bic_tensor.cpu().numpy())
281
+
282
+
283
+ def compute_f_stat_torch(y, resid, X_design, df_resid, device=None):
284
+ """
285
+ Compute F-statistic and p-value on Torch GPU.
286
+
287
+ Parameters
288
+ ----------
289
+ y : torch.Tensor
290
+ True values on GPU.
291
+ resid : torch.Tensor
292
+ Residuals on GPU.
293
+ X_design : torch.Tensor
294
+ Design matrix on GPU.
295
+ df_resid : int
296
+ Residual degrees of freedom.
297
+ device : str, optional
298
+ Torch device string.
299
+
300
+ Returns
301
+ -------
302
+ fvalue : float
303
+ F-statistic.
304
+ pvalue : float
305
+ p-value for F-statistic.
306
+ """
307
+ torch = _import_torch()
308
+
309
+ if device is None:
310
+ device = _get_torch_device()
311
+
312
+ from statgpu.inference._distributions_backend import get_distribution
313
+ f_dist = get_distribution("f", backend="torch", device=device)
314
+
315
+ y_mean = torch.mean(y)
316
+ ss_tot = torch.sum((y - y_mean) ** 2)
317
+ ss_res = torch.sum(resid ** 2)
318
+ ss_reg = ss_tot - ss_res
319
+
320
+ k = X_design.shape[1] - 1 # exclude intercept
321
+
322
+ if k == 0 or ss_res <= 0:
323
+ return float('inf'), 1.0
324
+
325
+ fvalue_tensor = (ss_reg / k) / (ss_res / df_resid)
326
+ fvalue = float(fvalue_tensor.cpu().numpy())
327
+
328
+ # p-value using F CDF
329
+ # For F ~ F(d1, d2): CDF(x) = I_{d1*x/(d1*x+d2)}(d1/2, d2/2)
330
+ d1 = float(k)
331
+ d2 = float(df_resid)
332
+
333
+ if d2 <= 0 or d1 <= 0:
334
+ pvalue = 1.0
335
+ else:
336
+ z = (d1 * fvalue) / (d1 * fvalue + d2)
337
+ cdf = f_dist.cdf(fvalue, dfn=d1, dfd=d2)
338
+ pvalue = 1.0 - float(cdf.cpu().numpy())
339
+
340
+ return fvalue, pvalue
341
+
342
+
343
+ def torch_memory_cleanup():
344
+ """
345
+ Best-effort Torch memory cleanup.
346
+
347
+ Empties CUDA cache if available.
348
+ """
349
+ torch = _import_torch()
350
+
351
+ if torch.cuda.is_available():
352
+ try:
353
+ torch.cuda.empty_cache()
354
+ except Exception:
355
+ pass
356
+
357
+
358
+ def is_torch_tensor(x):
359
+ """Check if input is a Torch tensor."""
360
+ torch = _import_torch()
361
+ return isinstance(x, torch.Tensor)
362
+
363
+
364
+ def to_numpy_from_torch(x):
365
+ """
366
+ Convert Torch tensor to NumPy array.
367
+
368
+ Handles both CPU and CUDA tensors.
369
+ """
370
+ torch = _import_torch()
371
+
372
+ if isinstance(x, torch.Tensor):
373
+ if x.is_cuda:
374
+ return x.detach().cpu().numpy()
375
+ return x.detach().numpy()
376
+
377
+ # Handle non-tensor inputs
378
+ if hasattr(x, 'get'): # CuPy array
379
+ return x.get()
380
+ return np.asarray(x)
381
+
382
+
383
+ def to_torch_from_numpy(x, device=None, dtype=None):
384
+ """
385
+ Convert NumPy array (or other types) to Torch tensor.
386
+
387
+ Parameters
388
+ ----------
389
+ x : array-like
390
+ Input data (NumPy, CuPy, or list).
391
+ device : str, optional
392
+ Target device ('cpu' or 'cuda').
393
+ dtype : torch.dtype, optional
394
+ Target dtype.
395
+
396
+ Returns
397
+ -------
398
+ torch.Tensor
399
+ """
400
+ torch = _import_torch()
401
+
402
+ if device is None:
403
+ device = _get_torch_device()
404
+
405
+ # Handle CuPy arrays
406
+ if hasattr(x, 'get'):
407
+ x = x.get()
408
+
409
+ # Handle Torch tensors
410
+ if isinstance(x, torch.Tensor):
411
+ if x.device.type != device:
412
+ x = x.to(device)
413
+ if dtype is not None and x.dtype != dtype:
414
+ x = x.to(dtype)
415
+ return x
416
+
417
+ # Convert to numpy first, then to torch
418
+ x_np = np.asarray(x)
419
+ tensor = torch.from_numpy(x_np).to(device)
420
+ if dtype is not None:
421
+ tensor = tensor.to(dtype)
422
+ return tensor