statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Regression diagnostics for model validation.
|
|
3
|
+
Includes residual analysis, influence measures, and VIF.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from scipy import stats
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RegressionDiagnostics:
|
|
11
|
+
"""
|
|
12
|
+
Diagnostics for regression models.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
model : fitted model
|
|
17
|
+
Fitted regression model with residuals_, fitted_, X_design attributes.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, model):
|
|
21
|
+
self.model = model
|
|
22
|
+
self._validate_model()
|
|
23
|
+
|
|
24
|
+
def _validate_model(self):
|
|
25
|
+
"""Check model has required attributes."""
|
|
26
|
+
required = ['_resid', '_X_design', '_y']
|
|
27
|
+
for attr in required:
|
|
28
|
+
if not hasattr(self.model, attr) or getattr(self.model, attr) is None:
|
|
29
|
+
raise ValueError(f"Model missing required attribute: {attr}")
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def residuals(self):
|
|
33
|
+
"""Raw residuals."""
|
|
34
|
+
return self.model._resid
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def fitted_values(self):
|
|
38
|
+
"""Fitted (predicted) values."""
|
|
39
|
+
return self.model._y - self.model._resid
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def standardized_residuals(self):
|
|
43
|
+
"""Standardized residuals (divided by estimated standard deviation)."""
|
|
44
|
+
sigma = np.sqrt(self.model._scale) if hasattr(self.model, '_scale') else np.std(self.residuals)
|
|
45
|
+
return self.residuals / sigma
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def studentized_residuals(self):
|
|
49
|
+
"""Studentized residuals (externally studentized)."""
|
|
50
|
+
n = len(self.residuals)
|
|
51
|
+
h = self.leverage
|
|
52
|
+
sigma = np.sqrt(self.model._scale) if hasattr(self.model, '_scale') else np.std(self.residuals)
|
|
53
|
+
|
|
54
|
+
# Internally studentized
|
|
55
|
+
stud = self.residuals / (sigma * np.sqrt(1 - h + 1e-10))
|
|
56
|
+
return stud
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def leverage(self):
|
|
60
|
+
"""Leverage values (diagonal of hat matrix)."""
|
|
61
|
+
X = self.model._X_design
|
|
62
|
+
XtX = X.T @ X
|
|
63
|
+
try:
|
|
64
|
+
XtX_inv_Xt = np.linalg.solve(XtX, X.T)
|
|
65
|
+
except np.linalg.LinAlgError:
|
|
66
|
+
XtX_inv_Xt = np.linalg.pinv(XtX) @ X.T
|
|
67
|
+
return np.einsum("ij,ij->i", X, XtX_inv_Xt.T)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def cooks_distance(self):
|
|
71
|
+
"""Cook's distance (influence measure)."""
|
|
72
|
+
stud = self.studentized_residuals
|
|
73
|
+
h = self.leverage
|
|
74
|
+
p = self.model._X_design.shape[1]
|
|
75
|
+
|
|
76
|
+
# Cook's D
|
|
77
|
+
cooks_d = (stud**2 / p) * (h / (1 - h + 1e-10))
|
|
78
|
+
return cooks_d
|
|
79
|
+
|
|
80
|
+
def vif(self):
|
|
81
|
+
"""
|
|
82
|
+
Variance Inflation Factor (multicollinearity measure).
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
vif : ndarray
|
|
87
|
+
VIF for each feature (excluding intercept).
|
|
88
|
+
"""
|
|
89
|
+
X = self.model._X_design
|
|
90
|
+
n_features = X.shape[1]
|
|
91
|
+
|
|
92
|
+
# Skip intercept
|
|
93
|
+
start_idx = 1 if self.model.fit_intercept else 0
|
|
94
|
+
|
|
95
|
+
vif_values = []
|
|
96
|
+
for i in range(start_idx, n_features):
|
|
97
|
+
# Regress feature i on all other features
|
|
98
|
+
y_vif = X[:, i]
|
|
99
|
+
X_vif = np.delete(X, i, axis=1)
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
coef, _, _, _ = np.linalg.lstsq(X_vif, y_vif, rcond=None)
|
|
103
|
+
y_pred = X_vif @ coef
|
|
104
|
+
ss_res = np.sum((y_vif - y_pred)**2)
|
|
105
|
+
ss_tot = np.sum((y_vif - np.mean(y_vif))**2)
|
|
106
|
+
r2 = 1 - ss_res / (ss_tot + 1e-10)
|
|
107
|
+
vif = 1 / (1 - r2 + 1e-10)
|
|
108
|
+
except:
|
|
109
|
+
vif = np.inf
|
|
110
|
+
|
|
111
|
+
vif_values.append(vif)
|
|
112
|
+
|
|
113
|
+
return np.array(vif_values)
|
|
114
|
+
|
|
115
|
+
def summary(self):
|
|
116
|
+
"""Print diagnostic summary."""
|
|
117
|
+
print("=" * 60)
|
|
118
|
+
print("Regression Diagnostics Summary")
|
|
119
|
+
print("=" * 60)
|
|
120
|
+
|
|
121
|
+
# Residuals
|
|
122
|
+
print("\n--- Residuals ---")
|
|
123
|
+
resid = self.residuals
|
|
124
|
+
print(f"Min: {np.min(resid):10.4f}")
|
|
125
|
+
print(f"Q1: {np.percentile(resid, 25):10.4f}")
|
|
126
|
+
print(f"Median: {np.median(resid):10.4f}")
|
|
127
|
+
print(f"Q3: {np.percentile(resid, 75):10.4f}")
|
|
128
|
+
print(f"Max: {np.max(resid):10.4f}")
|
|
129
|
+
|
|
130
|
+
# Normality test
|
|
131
|
+
_, shapiro_p = stats.shapiro(resid[:min(5000, len(resid))])
|
|
132
|
+
print(f"\nShapiro-Wilk normality test p-value: {shapiro_p:.4f}")
|
|
133
|
+
if shapiro_p < 0.05:
|
|
134
|
+
print("⚠ Residuals may not be normally distributed")
|
|
135
|
+
else:
|
|
136
|
+
print("✓ Residuals appear normally distributed")
|
|
137
|
+
|
|
138
|
+
# Leverage
|
|
139
|
+
h = self.leverage
|
|
140
|
+
h_threshold = 2 * len(self.model._params) / len(h)
|
|
141
|
+
high_leverage = np.sum(h > h_threshold)
|
|
142
|
+
print(f"\n--- Leverage ---")
|
|
143
|
+
print(f"Mean leverage: {np.mean(h):.4f}")
|
|
144
|
+
print(f"Max leverage: {np.max(h):.4f}")
|
|
145
|
+
print(f"High leverage points (>{h_threshold:.4f}): {high_leverage}")
|
|
146
|
+
|
|
147
|
+
# Cook's distance
|
|
148
|
+
cooks = self.cooks_distance
|
|
149
|
+
influential = np.sum(cooks > 1)
|
|
150
|
+
print(f"\n--- Cook's Distance ---")
|
|
151
|
+
print(f"Mean: {np.mean(cooks):.4f}")
|
|
152
|
+
print(f"Max: {np.max(cooks):.4f}")
|
|
153
|
+
print(f"Influential points (>1): {influential}")
|
|
154
|
+
|
|
155
|
+
# VIF
|
|
156
|
+
print(f"\n--- Variance Inflation Factor ---")
|
|
157
|
+
vif_values = self.vif()
|
|
158
|
+
for i, v in enumerate(vif_values):
|
|
159
|
+
status = "⚠" if v > 10 else "✓"
|
|
160
|
+
print(f" x{i+1}: {vif_values[i]:.2f} {status}")
|
|
161
|
+
|
|
162
|
+
if np.any(vif_values > 10):
|
|
163
|
+
print("\n⚠ High multicollinearity detected (VIF > 10)")
|
|
164
|
+
elif np.any(vif_values > 5):
|
|
165
|
+
print("\n⚠ Moderate multicollinearity (VIF > 5)")
|
|
166
|
+
else:
|
|
167
|
+
print("\n✓ No significant multicollinearity")
|
|
168
|
+
|
|
169
|
+
print("=" * 60)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def diagnose_model(model):
|
|
173
|
+
"""
|
|
174
|
+
Convenience function to diagnose a fitted model.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
model : fitted model
|
|
179
|
+
Fitted regression model.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
diagnostics : RegressionDiagnostics
|
|
184
|
+
Diagnostics object.
|
|
185
|
+
"""
|
|
186
|
+
diag = RegressionDiagnostics(model)
|
|
187
|
+
diag.summary()
|
|
188
|
+
return diag
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Feature selection methods for statgpu.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from ._knockoff import (
|
|
6
|
+
FixedXKnockoffSelector,
|
|
7
|
+
KnockoffResult,
|
|
8
|
+
KnockoffSelector,
|
|
9
|
+
fixed_x_knockoff_filter,
|
|
10
|
+
knockoff_filter,
|
|
11
|
+
model_x_knockoff_filter,
|
|
12
|
+
)
|
|
13
|
+
from ._stepwise import StepwiseSelector, stepwise_selection
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"StepwiseSelector",
|
|
17
|
+
"stepwise_selection",
|
|
18
|
+
"KnockoffResult",
|
|
19
|
+
"knockoff_filter",
|
|
20
|
+
"fixed_x_knockoff_filter",
|
|
21
|
+
"model_x_knockoff_filter",
|
|
22
|
+
"KnockoffSelector",
|
|
23
|
+
"FixedXKnockoffSelector",
|
|
24
|
+
]
|