statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,188 @@
1
+ """
2
+ Regression diagnostics for model validation.
3
+ Includes residual analysis, influence measures, and VIF.
4
+ """
5
+
6
+ import numpy as np
7
+ from scipy import stats
8
+
9
+
10
+ class RegressionDiagnostics:
11
+ """
12
+ Diagnostics for regression models.
13
+
14
+ Parameters
15
+ ----------
16
+ model : fitted model
17
+ Fitted regression model with residuals_, fitted_, X_design attributes.
18
+ """
19
+
20
+ def __init__(self, model):
21
+ self.model = model
22
+ self._validate_model()
23
+
24
+ def _validate_model(self):
25
+ """Check model has required attributes."""
26
+ required = ['_resid', '_X_design', '_y']
27
+ for attr in required:
28
+ if not hasattr(self.model, attr) or getattr(self.model, attr) is None:
29
+ raise ValueError(f"Model missing required attribute: {attr}")
30
+
31
+ @property
32
+ def residuals(self):
33
+ """Raw residuals."""
34
+ return self.model._resid
35
+
36
+ @property
37
+ def fitted_values(self):
38
+ """Fitted (predicted) values."""
39
+ return self.model._y - self.model._resid
40
+
41
+ @property
42
+ def standardized_residuals(self):
43
+ """Standardized residuals (divided by estimated standard deviation)."""
44
+ sigma = np.sqrt(self.model._scale) if hasattr(self.model, '_scale') else np.std(self.residuals)
45
+ return self.residuals / sigma
46
+
47
+ @property
48
+ def studentized_residuals(self):
49
+ """Studentized residuals (externally studentized)."""
50
+ n = len(self.residuals)
51
+ h = self.leverage
52
+ sigma = np.sqrt(self.model._scale) if hasattr(self.model, '_scale') else np.std(self.residuals)
53
+
54
+ # Internally studentized
55
+ stud = self.residuals / (sigma * np.sqrt(1 - h + 1e-10))
56
+ return stud
57
+
58
+ @property
59
+ def leverage(self):
60
+ """Leverage values (diagonal of hat matrix)."""
61
+ X = self.model._X_design
62
+ XtX = X.T @ X
63
+ try:
64
+ XtX_inv_Xt = np.linalg.solve(XtX, X.T)
65
+ except np.linalg.LinAlgError:
66
+ XtX_inv_Xt = np.linalg.pinv(XtX) @ X.T
67
+ return np.einsum("ij,ij->i", X, XtX_inv_Xt.T)
68
+
69
+ @property
70
+ def cooks_distance(self):
71
+ """Cook's distance (influence measure)."""
72
+ stud = self.studentized_residuals
73
+ h = self.leverage
74
+ p = self.model._X_design.shape[1]
75
+
76
+ # Cook's D
77
+ cooks_d = (stud**2 / p) * (h / (1 - h + 1e-10))
78
+ return cooks_d
79
+
80
+ def vif(self):
81
+ """
82
+ Variance Inflation Factor (multicollinearity measure).
83
+
84
+ Returns
85
+ -------
86
+ vif : ndarray
87
+ VIF for each feature (excluding intercept).
88
+ """
89
+ X = self.model._X_design
90
+ n_features = X.shape[1]
91
+
92
+ # Skip intercept
93
+ start_idx = 1 if self.model.fit_intercept else 0
94
+
95
+ vif_values = []
96
+ for i in range(start_idx, n_features):
97
+ # Regress feature i on all other features
98
+ y_vif = X[:, i]
99
+ X_vif = np.delete(X, i, axis=1)
100
+
101
+ try:
102
+ coef, _, _, _ = np.linalg.lstsq(X_vif, y_vif, rcond=None)
103
+ y_pred = X_vif @ coef
104
+ ss_res = np.sum((y_vif - y_pred)**2)
105
+ ss_tot = np.sum((y_vif - np.mean(y_vif))**2)
106
+ r2 = 1 - ss_res / (ss_tot + 1e-10)
107
+ vif = 1 / (1 - r2 + 1e-10)
108
+ except:
109
+ vif = np.inf
110
+
111
+ vif_values.append(vif)
112
+
113
+ return np.array(vif_values)
114
+
115
+ def summary(self):
116
+ """Print diagnostic summary."""
117
+ print("=" * 60)
118
+ print("Regression Diagnostics Summary")
119
+ print("=" * 60)
120
+
121
+ # Residuals
122
+ print("\n--- Residuals ---")
123
+ resid = self.residuals
124
+ print(f"Min: {np.min(resid):10.4f}")
125
+ print(f"Q1: {np.percentile(resid, 25):10.4f}")
126
+ print(f"Median: {np.median(resid):10.4f}")
127
+ print(f"Q3: {np.percentile(resid, 75):10.4f}")
128
+ print(f"Max: {np.max(resid):10.4f}")
129
+
130
+ # Normality test
131
+ _, shapiro_p = stats.shapiro(resid[:min(5000, len(resid))])
132
+ print(f"\nShapiro-Wilk normality test p-value: {shapiro_p:.4f}")
133
+ if shapiro_p < 0.05:
134
+ print("⚠ Residuals may not be normally distributed")
135
+ else:
136
+ print("✓ Residuals appear normally distributed")
137
+
138
+ # Leverage
139
+ h = self.leverage
140
+ h_threshold = 2 * len(self.model._params) / len(h)
141
+ high_leverage = np.sum(h > h_threshold)
142
+ print(f"\n--- Leverage ---")
143
+ print(f"Mean leverage: {np.mean(h):.4f}")
144
+ print(f"Max leverage: {np.max(h):.4f}")
145
+ print(f"High leverage points (>{h_threshold:.4f}): {high_leverage}")
146
+
147
+ # Cook's distance
148
+ cooks = self.cooks_distance
149
+ influential = np.sum(cooks > 1)
150
+ print(f"\n--- Cook's Distance ---")
151
+ print(f"Mean: {np.mean(cooks):.4f}")
152
+ print(f"Max: {np.max(cooks):.4f}")
153
+ print(f"Influential points (>1): {influential}")
154
+
155
+ # VIF
156
+ print(f"\n--- Variance Inflation Factor ---")
157
+ vif_values = self.vif()
158
+ for i, v in enumerate(vif_values):
159
+ status = "⚠" if v > 10 else "✓"
160
+ print(f" x{i+1}: {vif_values[i]:.2f} {status}")
161
+
162
+ if np.any(vif_values > 10):
163
+ print("\n⚠ High multicollinearity detected (VIF > 10)")
164
+ elif np.any(vif_values > 5):
165
+ print("\n⚠ Moderate multicollinearity (VIF > 5)")
166
+ else:
167
+ print("\n✓ No significant multicollinearity")
168
+
169
+ print("=" * 60)
170
+
171
+
172
+ def diagnose_model(model):
173
+ """
174
+ Convenience function to diagnose a fitted model.
175
+
176
+ Parameters
177
+ ----------
178
+ model : fitted model
179
+ Fitted regression model.
180
+
181
+ Returns
182
+ -------
183
+ diagnostics : RegressionDiagnostics
184
+ Diagnostics object.
185
+ """
186
+ diag = RegressionDiagnostics(model)
187
+ diag.summary()
188
+ return diag
@@ -0,0 +1,24 @@
1
+ """
2
+ Feature selection methods for statgpu.
3
+ """
4
+
5
+ from ._knockoff import (
6
+ FixedXKnockoffSelector,
7
+ KnockoffResult,
8
+ KnockoffSelector,
9
+ fixed_x_knockoff_filter,
10
+ knockoff_filter,
11
+ model_x_knockoff_filter,
12
+ )
13
+ from ._stepwise import StepwiseSelector, stepwise_selection
14
+
15
+ __all__ = [
16
+ "StepwiseSelector",
17
+ "stepwise_selection",
18
+ "KnockoffResult",
19
+ "knockoff_filter",
20
+ "fixed_x_knockoff_filter",
21
+ "model_x_knockoff_filter",
22
+ "KnockoffSelector",
23
+ "FixedXKnockoffSelector",
24
+ ]