statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,52 @@
1
+ """Ordered logistic regression (proportional odds model)."""
2
+
3
+ import numpy as np
4
+
5
+ from statgpu._config import Device
6
+ from statgpu.glm_core._family import Binomial, LogitLink
7
+ from statgpu.linear_model._glm_base import OrderedGeneralizedLinearModel
8
+
9
+
10
+ class OrderedLogitRegression(OrderedGeneralizedLinearModel):
11
+ """Ordered logit regression with GPU support.
12
+
13
+ Also known as the proportional odds model.
14
+
15
+ Parameters
16
+ ----------
17
+ n_categories : int, default=3
18
+ Number of ordinal categories.
19
+ fit_intercept : bool, default=True
20
+ max_iter : int, default=100
21
+ tol : float, default=1e-4
22
+ C : float, default=1.0
23
+ Inverse regularization strength.
24
+ device : str or Device, default='auto'
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ n_categories: int = 3,
30
+ fit_intercept: bool = True,
31
+ max_iter: int = 100,
32
+ tol: float = 1e-4,
33
+ C: float = 1.0,
34
+ device: Device = Device.AUTO,
35
+ n_jobs: int = None,
36
+ gpu_memory_cleanup: bool = False,
37
+ ):
38
+ super().__init__(
39
+ n_categories=n_categories,
40
+ family="binomial",
41
+ fit_intercept=fit_intercept,
42
+ max_iter=max_iter,
43
+ tol=tol,
44
+ C=C,
45
+ device=device,
46
+ n_jobs=n_jobs,
47
+ solver="auto",
48
+ gpu_memory_cleanup=gpu_memory_cleanup,
49
+ )
50
+
51
+ def _get_family(self):
52
+ return Binomial(link=LogitLink())
@@ -0,0 +1,50 @@
1
+ """Ordered probit regression."""
2
+
3
+ import numpy as np
4
+
5
+ from statgpu._config import Device
6
+ from statgpu.glm_core._family import Binomial, ProbitLink
7
+ from statgpu.linear_model._glm_base import OrderedGeneralizedLinearModel
8
+
9
+
10
+ class OrderedProbitRegression(OrderedGeneralizedLinearModel):
11
+ """Ordered probit regression.
12
+
13
+ Parameters
14
+ ----------
15
+ n_categories : int, default=3
16
+ Number of ordinal categories.
17
+ fit_intercept : bool, default=True
18
+ max_iter : int, default=100
19
+ tol : float, default=1e-4
20
+ C : float, default=1.0
21
+ Inverse regularization strength.
22
+ device : str or Device, default='auto'
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ n_categories: int = 3,
28
+ fit_intercept: bool = True,
29
+ max_iter: int = 100,
30
+ tol: float = 1e-4,
31
+ C: float = 1.0,
32
+ device: Device = Device.AUTO,
33
+ n_jobs: int = None,
34
+ gpu_memory_cleanup: bool = False,
35
+ ):
36
+ super().__init__(
37
+ n_categories=n_categories,
38
+ family="binomial",
39
+ fit_intercept=fit_intercept,
40
+ max_iter=max_iter,
41
+ tol=tol,
42
+ C=C,
43
+ device=device,
44
+ n_jobs=n_jobs,
45
+ solver="auto",
46
+ gpu_memory_cleanup=gpu_memory_cleanup,
47
+ )
48
+
49
+ def _get_family(self):
50
+ return Binomial(link=ProbitLink())
@@ -0,0 +1,170 @@
1
+ """
2
+ Statistical inference for linear models.
3
+ Computes standard errors, t-statistics, p-values, etc.
4
+ """
5
+
6
+ import numpy as np
7
+ from statgpu.inference import t as t_dist, f as f_dist
8
+
9
+
10
+ class RegressionResults:
11
+ """
12
+ Results class for linear regression with statistical inference.
13
+
14
+ Similar to statsmodels RegressionResultsWrapper.
15
+ """
16
+
17
+ def __init__(self, model, params, resid, scale, nobs, df_resid):
18
+ """
19
+ Initialize results object.
20
+
21
+ Parameters
22
+ ----------
23
+ model : fitted model instance
24
+ params : ndarray
25
+ Estimated parameters (including intercept if fitted)
26
+ resid : ndarray
27
+ Residuals
28
+ scale : float
29
+ Estimate of error variance (sigma^2)
30
+ nobs : int
31
+ Number of observations
32
+ df_resid : int
33
+ Degrees of freedom of residuals
34
+ """
35
+ self.model = model
36
+ self.params = params
37
+ self.resid = resid
38
+ self.scale = scale
39
+ self.nobs = nobs
40
+ self.df_resid = df_resid
41
+
42
+ # Compute standard errors and statistics
43
+ self._compute_inference()
44
+
45
+ def _compute_inference(self):
46
+ """Compute standard errors, t-stats, p-values, confidence intervals."""
47
+ # Get design matrix
48
+ X = self.model._X_design
49
+
50
+ # Compute (X'X)^-1
51
+ try:
52
+ XtX_inv = np.linalg.inv(X.T @ X)
53
+ except np.linalg.LinAlgError:
54
+ XtX_inv = np.linalg.pinv(X.T @ X)
55
+
56
+ # Standard errors: sqrt(scale * diag((X'X)^-1))
57
+ self.bse = np.sqrt(self.scale * np.diag(XtX_inv))
58
+
59
+ # t-statistics: coef / std_err
60
+ self.tvalues = self.params / self.bse
61
+
62
+ # p-values: two-tailed t-test
63
+ self.pvalues = 2 * (1 - t_dist.cdf(np.abs(self.tvalues), df=self.df_resid))
64
+
65
+ # Confidence intervals (95%)
66
+ alpha = 0.05
67
+ t_crit = float(t_dist.ppf(1 - alpha/2, df=self.df_resid))
68
+ self.conf_int = np.column_stack([
69
+ self.params - t_crit * self.bse,
70
+ self.params + t_crit * self.bse
71
+ ])
72
+
73
+ @property
74
+ def rsquared(self):
75
+ """R-squared."""
76
+ y = self.model._y
77
+ y_mean = np.mean(y)
78
+ ss_tot = np.sum((y - y_mean) ** 2)
79
+ ss_res = np.sum(self.resid ** 2)
80
+ return 1 - ss_res / ss_tot if ss_tot > 0 else 0.0
81
+
82
+ @property
83
+ def rsquared_adj(self):
84
+ """Adjusted R-squared."""
85
+ n = self.nobs
86
+ k = len(self.params) - 1 # exclude intercept from count
87
+ return 1 - (1 - self.rsquared) * (n - 1) / (n - k - 1)
88
+
89
+ @property
90
+ def fvalue(self):
91
+ """F-statistic for overall model significance."""
92
+ y = self.model._y
93
+ y_mean = np.mean(y)
94
+ ss_tot = np.sum((y - y_mean) ** 2)
95
+ ss_res = np.sum(self.resid ** 2)
96
+ ss_reg = ss_tot - ss_res
97
+
98
+ k = len(self.params) - 1
99
+ if k == 0 or ss_res <= 0:
100
+ return np.inf
101
+
102
+ return (ss_reg / k) / (ss_res / self.df_resid)
103
+
104
+ @property
105
+ def f_pvalue(self):
106
+ """p-value for F-test."""
107
+ k = len(self.params) - 1
108
+ if k == 0:
109
+ return 1.0
110
+ return 1 - float(f_dist.cdf(self.fvalue, dfn=k, dfd=self.df_resid))
111
+
112
+ @property
113
+ def aic(self):
114
+ """Akaike Information Criterion."""
115
+ n = self.nobs
116
+ k = len(self.params)
117
+ return n * np.log(self.scale) + 2 * k
118
+
119
+ @property
120
+ def bic(self):
121
+ """Bayesian Information Criterion."""
122
+ n = self.nobs
123
+ k = len(self.params)
124
+ return n * np.log(self.scale) + k * np.log(n)
125
+
126
+ def summary(self):
127
+ """Print summary table similar to R's summary(lm())."""
128
+ # Get feature names
129
+ if hasattr(self.model, '_feature_names'):
130
+ feature_names = self.model._feature_names
131
+ else:
132
+ feature_names = ['(Intercept)'] + [f'x{i}' for i in range(len(self.params) - 1)]
133
+
134
+ # Build summary table
135
+ print("=" * 80)
136
+ print("Linear Regression Results")
137
+ print("=" * 80)
138
+ print(f"No. Observations: {self.nobs:>15}")
139
+ print(f"Degrees of Freedom: {self.df_resid:>15}")
140
+ print(f"R-squared: {self.rsquared:>15.4f}")
141
+ print(f"Adj. R-squared: {self.rsquared_adj:>15.4f}")
142
+ print(f"F-statistic: {self.fvalue:>15.4f}")
143
+ print(f"Prob (F-statistic): {self.f_pvalue:>15.4e}")
144
+ print(f"Log-Likelihood: {self.llf:>15.4f}")
145
+ print(f"AIC: {self.aic:>15.4f}")
146
+ print(f"BIC: {self.bic:>15.4f}")
147
+ print("-" * 80)
148
+ print(f"{'':<20} {'coef':>12} {'std err':>12} {'t':>10} {'P>|t|':>10} {'[0.025':>12} {'0.975]':>12}")
149
+ print("-" * 80)
150
+
151
+ for i, name in enumerate(feature_names):
152
+ print(f"{name:<20} {self.params[i]:>12.4f} {self.bse[i]:>12.4f} "
153
+ f"{self.tvalues[i]:>10.3f} {self.pvalues[i]:>10.4f} "
154
+ f"{self.conf_int[i, 0]:>12.4f} {self.conf_int[i, 1]:>12.4f}")
155
+
156
+ print("=" * 80)
157
+
158
+ @property
159
+ def llf(self):
160
+ """Log-likelihood."""
161
+ n = self.nobs
162
+ return -n/2 * (np.log(2 * np.pi * self.scale) + 1)
163
+
164
+ def conf_int(self, alpha=0.05):
165
+ """Confidence intervals for parameters."""
166
+ t_crit = float(t_dist.ppf(1 - alpha/2, df=self.df_resid))
167
+ return np.column_stack([
168
+ self.params - t_crit * self.bse,
169
+ self.params + t_crit * self.bse
170
+ ])
@@ -0,0 +1,13 @@
1
+ """Cross-validated model wrappers (LassoCV, RidgeCV, ElasticNetCV, LogisticRegressionCV)."""
2
+
3
+ from ._lasso_cv import LassoCV
4
+ from ._ridge_cv import RidgeCV
5
+ from ._elasticnet_cv import ElasticNetCV
6
+ from ._logistic_cv import LogisticRegressionCV
7
+
8
+ __all__ = [
9
+ "LassoCV",
10
+ "RidgeCV",
11
+ "ElasticNetCV",
12
+ "LogisticRegressionCV",
13
+ ]