statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,5 @@
1
+ """Spline basis function utilities."""
2
+
3
+ from ._bspline_basis import bspline_basis, natural_cubic_spline_basis
4
+
5
+ __all__ = ['bspline_basis', 'natural_cubic_spline_basis']
@@ -0,0 +1,336 @@
1
+ """
2
+ B-spline and natural cubic spline basis construction with GPU support.
3
+
4
+ Implements De Boor's recursive algorithm for B-spline basis evaluation,
5
+ vectorized over sample points for efficient GPU computation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+
12
+ from statgpu.backends import _LINALG_ERRORS, _torch_dev, _to_numpy, xp_zeros, xp_eye, xp_full, xp_astype, xp_asarray
13
+
14
+
15
+ def _get_xp(xp):
16
+ """Return the array module (numpy if xp is None)."""
17
+ return xp if xp is not None else np
18
+
19
+
20
+ def bspline_basis(x, knots, degree=3, xp=None, boundary_lo=None, boundary_hi=None):
21
+ """
22
+ Evaluate B-spline basis matrix at points x.
23
+
24
+ Uses De Boor's recursive algorithm, vectorized over all sample points
25
+ for efficient GPU computation.
26
+
27
+ Parameters
28
+ ----------
29
+ x : array-like, shape (n,)
30
+ Evaluation points.
31
+ knots : array-like, shape (m,)
32
+ Interior knots (must be strictly increasing).
33
+ degree : int, default=3
34
+ Spline degree (3 = cubic).
35
+ xp : module, optional
36
+ Array module (numpy, cupy, or torch). If None, uses numpy.
37
+ boundary_lo : float, optional
38
+ Explicit lower boundary knot. If None, uses min(x, knots).
39
+ boundary_hi : float, optional
40
+ Explicit upper boundary knot. If None, uses max(x, knots).
41
+
42
+ Returns
43
+ -------
44
+ B : array, shape (n, m + degree + 1)
45
+ B-spline basis matrix. Each row corresponds to a sample point,
46
+ each column to a basis function.
47
+ """
48
+ xp = _get_xp(xp)
49
+
50
+ x = xp.asarray(x, dtype=xp.float64).ravel()
51
+ knots = xp.asarray(knots, dtype=xp.float64).ravel()
52
+ n = x.shape[0]
53
+ m = knots.shape[0]
54
+
55
+ if m == 0:
56
+ raise ValueError("At least one interior knot is required")
57
+
58
+ # Construct augmented knot vector:
59
+ # t = [boundary_lo]*(degree+1), knots..., [boundary_hi]*(degree+1)
60
+ # Use explicit boundaries if provided (e.g., training range for prediction),
61
+ # otherwise use the wider of (x range, knots range).
62
+ knot_min = float(xp.min(knots))
63
+ knot_max = float(xp.max(knots))
64
+
65
+ if boundary_lo is None:
66
+ x_min = float(xp.min(x))
67
+ boundary_lo = min(x_min, knot_min)
68
+ if boundary_hi is None:
69
+ x_max = float(xp.max(x))
70
+ boundary_hi = max(x_max, knot_max)
71
+
72
+ # Ensure interior knots are strictly within boundary
73
+ if knot_min <= boundary_lo or knot_max >= boundary_hi:
74
+ raise ValueError(
75
+ "Interior knots must be strictly within the boundary range "
76
+ f"({boundary_lo}, {boundary_hi})"
77
+ )
78
+
79
+ left_pad = xp_full(degree + 1, boundary_lo, xp.float64, xp, x)
80
+ right_pad = xp_full(degree + 1, boundary_hi, xp.float64, xp, x)
81
+ t = xp.concatenate([left_pad, knots, right_pad])
82
+
83
+ n_knots = len(t)
84
+ n_basis = n_knots - degree - 1 # = m + degree + 1
85
+
86
+ # Pre-extract all knot values to CPU in one transfer
87
+ t_cpu = _to_numpy(t).tolist()
88
+
89
+ # De Boor recursion, vectorized over x
90
+ # Initialize degree-0 indicator functions for all n_knots-1 intervals.
91
+ n_intervals = n_knots - 1
92
+ B = xp_zeros((n, n_intervals), xp.float64, xp, x)
93
+
94
+ # B_{i,0}(x) = 1 if t_i <= x < t_{i+1} else 0
95
+ # For the last non-degenerate interval, include right endpoint.
96
+ last_nondeg = -1
97
+ for i in range(n_intervals):
98
+ if t_cpu[i + 1] > t_cpu[i]:
99
+ last_nondeg = i
100
+
101
+ # Vectorized degree-0 initialization
102
+ for i in range(n_intervals):
103
+ t_i = t_cpu[i]
104
+ t_ip1 = t_cpu[i + 1]
105
+ if t_ip1 > t_i:
106
+ if i == last_nondeg:
107
+ mask = (x >= t_i) & (x <= t_ip1)
108
+ else:
109
+ mask = (x >= t_i) & (x < t_ip1)
110
+ B[:, i] = xp_astype(mask, xp.float64, xp)
111
+
112
+ # Recursive computation for degrees 1, 2, ..., degree
113
+ # Outer loop has data dependencies (each k uses B from k-1).
114
+ # Inner loop over basis functions is vectorized.
115
+ for k in range(1, degree + 1):
116
+ n_cur = n_intervals - k
117
+
118
+ # Precompute knot arrays for all basis functions at once
119
+ # Use xp_asarray with ref_arr=x to ensure same device (GPU if x is on GPU)
120
+ t_lo = xp_asarray([t_cpu[i] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
121
+ t_hi = xp_asarray([t_cpu[i + k] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
122
+ t_ip1 = xp_asarray([t_cpu[i + 1] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
123
+ t_ip1k = xp_asarray([t_cpu[i + 1 + k] for i in range(n_cur)], dtype=xp.float64, xp=xp, ref_arr=x)
124
+
125
+ denom1 = t_hi - t_lo
126
+ denom2 = t_ip1k - t_ip1
127
+
128
+ # Safe denominators (replace 0 with 1 to avoid division by zero)
129
+ safe_denom1 = xp.where(denom1 > 0, denom1, 1.0)
130
+ safe_denom2 = xp.where(denom2 > 0, denom2, 1.0)
131
+
132
+ # Masks: (n_cur,)
133
+ mask1 = denom1 > 0
134
+ mask2 = denom2 > 0
135
+
136
+ # w1, w2: (n, n_cur) — broadcast x (n,) with knot arrays (n_cur,)
137
+ w1 = xp.where(mask1[None, :], (x[:, None] - t_lo[None, :]) / safe_denom1[None, :], 0.0)
138
+ w2 = xp.where(mask2[None, :], (x[:, None] - t_ip1[None, :]) / safe_denom2[None, :], 0.0)
139
+
140
+ # Vectorized De Boor step: (n, n_cur)
141
+ B_new = w1 * B[:, :n_cur] + (1.0 - w2) * B[:, 1:n_cur + 1]
142
+ B = B_new
143
+
144
+ # Final result has exactly n_basis columns
145
+ return B
146
+
147
+
148
+ def natural_cubic_spline_basis(x, knots, xp=None):
149
+ """
150
+ Natural cubic spline basis (linear beyond boundary knots).
151
+
152
+ Constructs a cubic B-spline basis and applies boundary constraints
153
+ to enforce linearity beyond the boundary knots. This reduces the
154
+ effective number of basis functions by 2 compared to a regular
155
+ cubic B-spline.
156
+
157
+ Parameters
158
+ ----------
159
+ x : array-like, shape (n,)
160
+ Evaluation points.
161
+ knots : array-like, shape (m,)
162
+ Interior knots (must be strictly increasing).
163
+ xp : module, optional
164
+ Array module (numpy, cupy, or torch). If None, uses numpy.
165
+
166
+ Returns
167
+ -------
168
+ B : array, shape (n, m + 1)
169
+ Natural cubic spline basis matrix. The first column is typically
170
+ the intercept (constant), and the remaining columns are the
171
+ natural spline basis functions.
172
+ """
173
+ xp = _get_xp(xp)
174
+
175
+ x = xp.asarray(x, dtype=xp.float64).ravel()
176
+ knots = xp.asarray(knots, dtype=xp.float64).ravel()
177
+ n = x.shape[0]
178
+ m = knots.shape[0]
179
+
180
+ if m < 1:
181
+ raise ValueError("At least one interior knot is required for natural cubic splines")
182
+
183
+ x_min = float(xp.min(x))
184
+ x_max = float(xp.max(x))
185
+
186
+ # Build cubic B-spline basis
187
+ # Use a range that covers both x and knots for bspline_basis
188
+ knot_min = float(xp.min(knots))
189
+ knot_max = float(xp.max(knots))
190
+ eval_min = min(x_min, knot_min - 1.0)
191
+ eval_max = max(x_max, knot_max + 1.0)
192
+
193
+ B_cubic = bspline_basis(x, knots, degree=3, xp=xp)
194
+ n_basis = B_cubic.shape[1]
195
+
196
+ # Apply boundary constraints to enforce linearity beyond boundary knots.
197
+ # The constraint is that the second derivative is zero at the boundary knots.
198
+ # Build the constraint matrix C such that C @ beta = 0
199
+ # where beta are the coefficients of the cubic B-spline basis.
200
+
201
+ # For numerical differentiation, we use points near the boundaries
202
+ # but with a range wide enough to cover the knots.
203
+ eps = 1e-6
204
+
205
+ # Create evaluation arrays wide enough for bspline_basis validation
206
+ # Second derivative at x_min (near left boundary knot)
207
+ x_eval_lo = xp_asarray([x_min, x_min + eps, x_min + 2 * eps,
208
+ x_max, x_max - eps, x_max - 2 * eps],
209
+ dtype=xp.float64, xp=xp, ref_arr=x)
210
+
211
+ # Build basis at all 6 evaluation points at once
212
+ B_eval = bspline_basis(x_eval_lo, knots, degree=3, xp=xp)
213
+
214
+ # Extract individual evaluations
215
+ B_lo = B_eval[0:1, :]
216
+ B_lo_eps = B_eval[1:2, :]
217
+ B_lo_eps2 = B_eval[2:3, :]
218
+ B_hi = B_eval[3:4, :]
219
+ B_hi_eps = B_eval[4:5, :]
220
+ B_hi_eps2 = B_eval[5:6, :]
221
+
222
+ d2_lo = (B_lo_eps2 - 2 * B_lo_eps + B_lo) / (eps ** 2)
223
+ d2_hi = (B_hi_eps2 - 2 * B_hi_eps + B_hi) / (eps ** 2)
224
+
225
+ # Stack constraints: C is (2, n_basis)
226
+ C = xp.vstack([d2_lo, d2_hi])
227
+
228
+ # Find null space of C using SVD.
229
+ # C is (2, n_basis). SVD gives U(2,2), S(2,), Vh(n_basis, n_basis).
230
+ # The null space is spanned by the last (n_basis - rank) rows of Vh.
231
+ try:
232
+ U, S_vals, Vh = xp.linalg.svd(C)
233
+ n_rank = int(xp.sum(S_vals > max(C.shape) * S_vals[0] * xp.finfo(xp.float64).eps))
234
+ null_space = Vh[n_rank:].T # shape: (n_basis, n_basis - n_rank)
235
+ except _LINALG_ERRORS:
236
+ # Fallback: compute null space of C via QR
237
+ Q_c, R_c = xp.linalg.qr(C.T, mode='reduced')
238
+ # Null space is the complement of column space of C.T
239
+ # Build full QR of identity and project out C's column space
240
+ Q_full, _ = xp.linalg.qr(xp.eye(n_basis, dtype=xp.float64))
241
+ # Remove components in C's column space
242
+ proj = Q_full - Q_c @ (Q_c.T @ Q_full)
243
+ # Re-orthogonalize to get clean null space basis
244
+ Q_null, _ = xp.linalg.qr(proj, mode='reduced')
245
+ null_space = Q_null[:, C.shape[0]:] if Q_null.shape[1] > C.shape[0] else Q_null
246
+
247
+ # Project the B-spline basis onto the null space
248
+ # B_natural = B_cubic @ null_space
249
+ B_natural = B_cubic @ null_space
250
+
251
+ return B_natural
252
+
253
+
254
+ def _bspline_basis_derivative(x, knots, degree=3, deriv_order=1, xp=None):
255
+ """
256
+ Evaluate derivative of B-spline basis.
257
+
258
+ Uses the derivative formula for B-splines:
259
+ B'_{i,k}(x) = k/(t_{i+k} - t_i) * B_{i,k-1}(x) - k/(t_{i+k+1} - t_{i+1}) * B_{i+1,k-1}(x)
260
+
261
+ Parameters
262
+ ----------
263
+ x : array-like, shape (n,)
264
+ Evaluation points.
265
+ knots : array-like, shape (m,)
266
+ Interior knots.
267
+ degree : int, default=3
268
+ Spline degree.
269
+ deriv_order : int, default=1
270
+ Order of derivative (must be <= degree).
271
+ xp : module, optional
272
+ Array module.
273
+
274
+ Returns
275
+ -------
276
+ dB : array, shape (n, n_basis)
277
+ Derivative of B-spline basis matrix.
278
+ """
279
+ xp = _get_xp(xp)
280
+
281
+ if deriv_order > degree:
282
+ return xp_zeros((len(x), len(knots) + degree + 1), xp.float64, xp, x)
283
+
284
+ if deriv_order == 0:
285
+ return bspline_basis(x, knots, degree=degree, xp=xp)
286
+
287
+ # Compute derivative using the recursive formula
288
+ # For first derivative of degree k B-spline:
289
+ # B'_{i,k} = k/(t_{i+k}-t_i) * B_{i,k-1} - k/(t_{i+k+1}-t_{i+1}) * B_{i+1,k-1}
290
+
291
+ x = xp_asarray(x, dtype=xp.float64, xp=xp).ravel()
292
+ knots = xp_asarray(knots, dtype=xp.float64, xp=xp, ref_arr=x).ravel()
293
+
294
+ x_min = float(xp.min(x))
295
+ x_max = float(xp.max(x))
296
+ knot_min = float(xp.min(knots))
297
+ knot_max = float(xp.max(knots))
298
+
299
+ # Use same boundary logic as bspline_basis
300
+ boundary_lo = min(x_min, knot_min)
301
+ boundary_hi = max(x_max, knot_max)
302
+
303
+ left_pad = xp_full(degree + 1, boundary_lo, xp.float64, xp, x)
304
+ right_pad = xp_full(degree + 1, boundary_hi, xp.float64, xp, x)
305
+ t = xp.concatenate([left_pad, knots, right_pad])
306
+
307
+ # Get B-spline basis of degree (degree - deriv_order) with SAME knot vector
308
+ reduced_degree = degree - deriv_order
309
+ B_reduced = bspline_basis(x, knots, degree=reduced_degree, xp=xp,
310
+ boundary_lo=boundary_lo, boundary_hi=boundary_hi)
311
+
312
+ n_basis = len(t) - degree - 1
313
+ n_basis_reduced = len(t) - reduced_degree - 1
314
+
315
+ # Apply the derivative formula recursively
316
+ # For each derivative order, we apply:
317
+ # dB_{i,k} = k/(t_{i+k}-t_i) * B_{i,k-1} - k/(t_{i+k+1}-t_{i+1}) * B_{i+1,k-1}
318
+
319
+ dB = B_reduced
320
+ for d in range(deriv_order):
321
+ current_degree = reduced_degree + d
322
+ n_current = dB.shape[1]
323
+ dB_new = xp_zeros((len(x), n_current - 1), xp.float64, xp, x)
324
+
325
+ for i in range(n_current - 1):
326
+ denom1 = float(t[i + current_degree] - t[i])
327
+ denom2 = float(t[i + current_degree + 1] - t[i + 1])
328
+
329
+ term1 = (current_degree / denom1 * dB[:, i]) if denom1 > 0 else xp_zeros(len(x), xp.float64, xp, x)
330
+ term2 = (current_degree / denom2 * dB[:, i + 1]) if denom2 > 0 else xp_zeros(len(x), xp.float64, xp, x)
331
+
332
+ dB_new[:, i] = term1 - term2
333
+
334
+ dB = dB_new
335
+
336
+ return dB
@@ -0,0 +1,349 @@
1
+ """
2
+ Penalized least squares utilities for spline smoothing.
3
+
4
+ Provides functions for solving penalized regression problems and
5
+ constructing difference penalty matrices for spline smoothing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+
12
+ from statgpu.backends import _LINALG_ERRORS, _torch_dev, _to_numpy, xp_zeros, xp_eye, xp_asarray, xp_cholesky_solve
13
+
14
+
15
+ def _get_xp(xp):
16
+ """Return the array module (numpy if xp is None)."""
17
+ return xp if xp is not None else np
18
+
19
+
20
+ def difference_penalty(order, n_coef, xp=None):
21
+ """
22
+ Construct difference penalty matrix of given order.
23
+
24
+ The penalty matrix S = D^T @ D penalizes differences between
25
+ adjacent coefficients, encouraging smoothness.
26
+
27
+ Parameters
28
+ ----------
29
+ order : int
30
+ Order of differences. order=1 penalizes first differences
31
+ (piecewise linear), order=2 penalizes second differences
32
+ (piecewise quadratic, the default for smoothing).
33
+ n_coef : int
34
+ Number of spline coefficients (basis functions).
35
+ xp : module, optional
36
+ Array module (numpy, cupy, or torch). If None, uses numpy.
37
+
38
+ Returns
39
+ -------
40
+ S : array, shape (n_coef, n_coef)
41
+ Penalty matrix (positive semi-definite).
42
+ """
43
+ xp = _get_xp(xp)
44
+
45
+ if order < 1:
46
+ raise ValueError("Penalty order must be >= 1")
47
+ if n_coef <= order:
48
+ raise ValueError(
49
+ f"n_coef ({n_coef}) must be greater than order ({order})"
50
+ )
51
+
52
+ # Construct difference matrix D of shape (n_coef - order, n_coef)
53
+ # For order=1: D[i, i] = -1, D[i, i+1] = 1
54
+ # For order=2: D[i, i] = 1, D[i, i+1] = -2, D[i, i+2] = 1
55
+
56
+ # Build D using iterative differencing of identity matrix
57
+ D = xp_eye(n_coef, xp.float64, xp)
58
+ for _ in range(order):
59
+ # First differences of current D
60
+ D = D[1:, :] - D[:-1, :]
61
+
62
+ # Penalty matrix S = D^T @ D
63
+ S = D.T @ D
64
+
65
+ return S
66
+
67
+
68
+ def penalized_ls(B, y, penalty_matrix, lambda_, xp=None):
69
+ """
70
+ Solve penalized least squares problem.
71
+
72
+ Minimizes: ||y - B @ beta||^2 + lambda_ * beta^T @ S @ beta
73
+
74
+ Parameters
75
+ ----------
76
+ B : array, shape (n, p)
77
+ Basis matrix (design matrix for the spline).
78
+ y : array, shape (n,) or (n, 1)
79
+ Response vector.
80
+ penalty_matrix : array, shape (p, p)
81
+ Penalty matrix S (positive semi-definite).
82
+ lambda_ : float
83
+ Smoothing parameter (must be non-negative).
84
+ xp : module, optional
85
+ Array module (numpy, cupy, or torch). If None, uses numpy.
86
+
87
+ Returns
88
+ -------
89
+ beta : array, shape (p,) or (p, 1)
90
+ Fitted coefficients.
91
+ edf : float
92
+ Effective degrees of freedom: trace(B @ (B^T @ B + lambda_ * S)^{-1} @ B^T).
93
+ """
94
+ xp = _get_xp(xp)
95
+
96
+ B = xp_asarray(B, dtype=xp.float64, xp=xp)
97
+ y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
98
+ penalty_matrix = xp_asarray(penalty_matrix, dtype=xp.float64, xp=xp, ref_arr=B)
99
+
100
+ if y.ndim == 1:
101
+ y = y.reshape(-1, 1)
102
+
103
+ n, p = B.shape
104
+
105
+ # Normal equations: (B^T @ B + lambda_ * S) @ beta = B^T @ y
106
+ BtB = B.T @ B
107
+ Bty = B.T @ y
108
+ A = BtB + lambda_ * penalty_matrix
109
+
110
+ # Solve using Cholesky decomposition (more efficient for symmetric positive definite)
111
+ A_used = A # track which matrix was actually used (for edf consistency)
112
+ try:
113
+ # Add small jitter for numerical stability
114
+ jitter = 1e-10 * xp.trace(A) / p
115
+ A_stable = A + jitter * xp_eye(p, xp.float64, xp, A)
116
+ A_used = A_stable
117
+ beta = xp_cholesky_solve(A_stable, Bty, xp)
118
+ except _LINALG_ERRORS:
119
+ # Fallback to general solve
120
+ try:
121
+ beta = xp.linalg.solve(A, Bty)
122
+ except _LINALG_ERRORS:
123
+ # Last resort: least squares
124
+ beta = xp.linalg.lstsq(A, Bty, rcond=None)[0]
125
+
126
+ # Effective degrees of freedom: edf = tr(A^{-1} @ B^T @ B)
127
+ # Use the same matrix as the beta solve for consistency.
128
+ try:
129
+ A_inv_BtB = xp.linalg.solve(A_used, BtB)
130
+ edf = xp.trace(A_inv_BtB)
131
+ # Clamp edf to valid range [0, p]
132
+ # Keep as GPU scalar — use clip/clamp for device compatibility
133
+ if hasattr(edf, 'clamp'): # torch
134
+ edf = edf.clamp(0.0, float(p))
135
+ else: # numpy/cupy
136
+ edf = xp.clip(edf, 0.0, float(p))
137
+ except _LINALG_ERRORS:
138
+ edf = float(p)
139
+
140
+ # Flatten beta if y was 1D
141
+ if y.shape[1] == 1:
142
+ beta = beta.ravel()
143
+
144
+ return beta, edf
145
+
146
+
147
+ def generalized_cross_validation(B, y, penalty_matrix, lambda_, xp=None):
148
+ """
149
+ Compute Generalized Cross-Validation (GCV) score.
150
+
151
+ GCV = n * RSS / (n - edf)^2
152
+
153
+ where RSS is the residual sum of squares and edf is the effective
154
+ degrees of freedom.
155
+
156
+ Parameters
157
+ ----------
158
+ B : array, shape (n, p)
159
+ Basis matrix.
160
+ y : array, shape (n,)
161
+ Response vector.
162
+ penalty_matrix : array, shape (p, p)
163
+ Penalty matrix.
164
+ lambda_ : float
165
+ Smoothing parameter.
166
+ xp : module, optional
167
+ Array module.
168
+
169
+ Returns
170
+ -------
171
+ gcv : float
172
+ GCV score (lower is better).
173
+ """
174
+ xp = _get_xp(xp)
175
+
176
+ B = xp_asarray(B, dtype=xp.float64, xp=xp)
177
+ y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
178
+
179
+ beta, edf = penalized_ls(B, y, penalty_matrix, lambda_, xp)
180
+
181
+ resid = y - B @ beta
182
+ n = len(y)
183
+
184
+ rss = xp.sum(resid ** 2) # GPU scalar, no sync
185
+
186
+ # Avoid division by zero or negative denom (edf >= n)
187
+ denom = 1.0 - edf / n
188
+ # Keep denom as GPU scalar for xp.where compatibility
189
+ if hasattr(denom, 'item'): # torch/cupy scalar
190
+ _inf = xp.tensor(float('inf'), dtype=denom.dtype, device=denom.device) if hasattr(xp, 'tensor') else float('inf')
191
+ gcv = xp.where(denom > 1e-10, rss / n / (denom ** 2), _inf)
192
+ else:
193
+ gcv = rss / n / (denom ** 2) if denom > 1e-10 else float('inf')
194
+
195
+ return gcv
196
+
197
+
198
+ def select_lambda_gcv(B, y, penalty_matrix, lambda_grid=None, xp=None):
199
+ """
200
+ Select smoothing parameter via Generalized Cross-Validation.
201
+
202
+ Searches over a grid of lambda values and selects the one that
203
+ minimizes the GCV score.
204
+
205
+ Parameters
206
+ ----------
207
+ B : array, shape (n, p)
208
+ Basis matrix.
209
+ y : array, shape (n,)
210
+ Response vector.
211
+ penalty_matrix : array, shape (p, p)
212
+ Penalty matrix.
213
+ lambda_grid : array-like, optional
214
+ Grid of lambda values to search over. If None, uses a
215
+ log-spaced grid from 1e-10 to 1e10.
216
+ xp : module, optional
217
+ Array module.
218
+
219
+ Returns
220
+ -------
221
+ best_lambda : float
222
+ Lambda value that minimizes GCV.
223
+ gcv_scores : array
224
+ GCV scores for each lambda in the grid.
225
+ """
226
+ xp = _get_xp(xp)
227
+
228
+ B = xp_asarray(B, dtype=xp.float64, xp=xp)
229
+ y = xp_asarray(y, dtype=xp.float64, xp=xp, ref_arr=B)
230
+ penalty_matrix = xp_asarray(penalty_matrix, dtype=xp.float64, xp=xp, ref_arr=B)
231
+
232
+ if lambda_grid is None:
233
+ lambda_grid = xp.logspace(-10, 10, 100)
234
+
235
+ lambda_grid = xp_asarray(lambda_grid, dtype=xp.float64, xp=xp, ref_arr=B)
236
+
237
+ # GCV loop on device. penalized_ls and generalized_cross_validation
238
+ # return GPU scalars — no per-iteration sync.
239
+ gcv_list = []
240
+ for i in range(len(lambda_grid)):
241
+ gcv_val = generalized_cross_validation(
242
+ B, y, penalty_matrix, lambda_grid[i], xp
243
+ )
244
+ gcv_list.append(gcv_val)
245
+
246
+ gcv_vec = xp.stack(gcv_list)
247
+ gcv_np = _to_numpy(gcv_vec) # single sync
248
+ best_idx = int(np.argmin(gcv_np))
249
+ best_lambda = float(_to_numpy(lambda_grid)[best_idx])
250
+ gcv_scores = xp_asarray(gcv_np, dtype=xp.float64, xp=xp, ref_arr=B)
251
+
252
+ return best_lambda, gcv_scores
253
+
254
+
255
+ def fit_penalized_spline(x, y, knots, degree=3, penalty_order=2,
256
+ lambda_=1.0, xp=None):
257
+ """
258
+ Fit a penalized spline to data.
259
+
260
+ Parameters
261
+ ----------
262
+ x : array-like, shape (n,)
263
+ Predictor variable.
264
+ y : array-like, shape (n,)
265
+ Response variable.
266
+ knots : array-like, shape (m,)
267
+ Interior knots.
268
+ degree : int, default=3
269
+ Spline degree.
270
+ penalty_order : int, default=2
271
+ Order of the difference penalty.
272
+ lambda_ : float, default=1.0
273
+ Smoothing parameter.
274
+ xp : module, optional
275
+ Array module.
276
+
277
+ Returns
278
+ -------
279
+ beta : array, shape (n_basis,)
280
+ Fitted spline coefficients.
281
+ edf : float
282
+ Effective degrees of freedom.
283
+ B : array, shape (n, n_basis)
284
+ Basis matrix.
285
+ S : array, shape (n_basis, n_basis)
286
+ Penalty matrix.
287
+ """
288
+ from statgpu.nonparametric.splines._bspline_basis import bspline_basis
289
+
290
+ xp = _get_xp(xp)
291
+
292
+ x = xp.asarray(x, dtype=xp.float64).ravel()
293
+ y = xp.asarray(y, dtype=xp.float64).ravel()
294
+
295
+ # Build basis matrix
296
+ B = bspline_basis(x, knots, degree=degree, xp=xp)
297
+
298
+ # Build penalty matrix
299
+ n_basis = B.shape[1]
300
+ S = difference_penalty(penalty_order, n_basis, xp)
301
+
302
+ # Solve penalized least squares
303
+ beta, edf = penalized_ls(B, y, S, lambda_, xp)
304
+
305
+ return beta, edf, B, S
306
+
307
+
308
+ def predict_penalized_spline(x_new, beta, knots, degree=3, xp=None,
309
+ boundary_lo=None, boundary_hi=None):
310
+ """
311
+ Predict using a fitted penalized spline.
312
+
313
+ Parameters
314
+ ----------
315
+ x_new : array-like, shape (n_new,)
316
+ New predictor values.
317
+ beta : array, shape (n_basis,)
318
+ Fitted spline coefficients.
319
+ knots : array-like, shape (m,)
320
+ Interior knots used for fitting.
321
+ degree : int, default=3
322
+ Spline degree.
323
+ xp : module, optional
324
+ Array module.
325
+ boundary_lo : float, optional
326
+ Lower boundary knot (from training data). Required for small batches.
327
+ boundary_hi : float, optional
328
+ Upper boundary knot (from training data). Required for small batches.
329
+
330
+ Returns
331
+ -------
332
+ y_pred : array, shape (n_new,)
333
+ Predicted values.
334
+ """
335
+ from statgpu.nonparametric.splines._bspline_basis import bspline_basis
336
+
337
+ xp = _get_xp(xp)
338
+
339
+ x_new = xp.asarray(x_new, dtype=xp.float64).ravel()
340
+ beta = xp.asarray(beta, dtype=xp.float64)
341
+
342
+ # Build basis matrix for new points, using training boundaries
343
+ B_new = bspline_basis(x_new, knots, degree=degree, xp=xp,
344
+ boundary_lo=boundary_lo, boundary_hi=boundary_hi)
345
+
346
+ # Predict
347
+ y_pred = B_new @ beta
348
+
349
+ return y_pred
@@ -0,0 +1,19 @@
1
+ """
2
+ Panel data models with GPU acceleration.
3
+
4
+ Provides fixed effects and random effects estimators for panel/longitudinal
5
+ data, along with clustered covariance estimators.
6
+ """
7
+
8
+ from ._fixed_effects import PanelOLS
9
+ from ._random_effects import RandomEffects
10
+ from ._covariance import clustered_covariance, two_way_clustered_covariance
11
+ from ._utils import PanelSummary
12
+
13
+ __all__ = [
14
+ 'PanelOLS',
15
+ 'RandomEffects',
16
+ 'PanelSummary',
17
+ 'clustered_covariance',
18
+ 'two_way_clustered_covariance',
19
+ ]