statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,194 @@
1
+ """GPU-accelerated one-way ANOVA.
2
+
3
+ Provides :func:`f_oneway`, a backend-agnostic replacement for
4
+ ``scipy.stats.f_oneway`` that can run on NumPy, CuPy, or PyTorch arrays.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ __all__ = ["f_oneway"]
10
+
11
+ from dataclasses import dataclass
12
+ from typing import Any, Tuple, Union
13
+
14
+ import numpy as np
15
+
16
+ from statgpu.backends import _get_xp, _resolve_backend, _to_float_scalar, _to_numpy
17
+
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Result container
21
+ # ---------------------------------------------------------------------------
22
+
23
+ @dataclass
24
+ class AnovaResult:
25
+ """Result of a one-way ANOVA.
26
+
27
+ Attributes
28
+ ----------
29
+ statistic : float
30
+ The F-statistic.
31
+ pvalue : float
32
+ P-value from the F-distribution survival function.
33
+ df_between : int
34
+ Degrees of freedom between groups (k - 1).
35
+ df_within : int
36
+ Degrees of freedom within groups (N - k).
37
+ eta_squared : float
38
+ Effect size: SSB / (SSB + SSW).
39
+ """
40
+
41
+ statistic: float
42
+ pvalue: float
43
+ df_between: int
44
+ df_within: int
45
+ eta_squared: float
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Core implementation
50
+ # ---------------------------------------------------------------------------
51
+
52
+ def f_oneway(
53
+ *groups: Any,
54
+ backend: str = "auto",
55
+ dtype: Any = None,
56
+ ) -> AnovaResult:
57
+ """Perform a one-way ANOVA.
58
+
59
+ Parameters
60
+ ----------
61
+ *groups : array-like
62
+ Two or more sample arrays, one per group. Each must be 1-D (or
63
+ flattenable to 1-D).
64
+ backend : {'auto', 'numpy', 'cupy', 'torch'}, default='auto'
65
+ Compute backend. ``'auto'`` inspects the input arrays and picks the
66
+ best match.
67
+ dtype : dtype or None, default=None
68
+ Float dtype for computation. ``None`` uses ``float64``.
69
+ Pass ``float32`` for faster GPU computation on consumer GPUs.
70
+
71
+ Returns
72
+ -------
73
+ AnovaResult
74
+ Dataclass with ``statistic``, ``pvalue``, ``df_between``,
75
+ ``df_within``, and ``eta_squared``.
76
+
77
+ Raises
78
+ ------
79
+ ValueError
80
+ If fewer than 2 groups are supplied or any group has fewer than 1
81
+ observation.
82
+
83
+ Examples
84
+ --------
85
+ >>> import numpy as np
86
+ >>> from statgpu.anova import f_oneway
87
+ >>> g1 = np.array([5.1, 4.9, 5.0])
88
+ >>> g2 = np.array([6.2, 6.0, 6.3])
89
+ >>> g3 = np.array([7.1, 7.3, 7.0])
90
+ >>> result = f_oneway(g1, g2, g3)
91
+ >>> result.statistic # doctest: +SKIP
92
+ 114.54545454545453
93
+ """
94
+ if len(groups) < 2:
95
+ raise ValueError("f_oneway requires at least 2 groups")
96
+
97
+ # Resolve backend from input arrays
98
+ resolved = _resolve_backend(backend, *groups)
99
+ xp = _get_xp(resolved)
100
+
101
+ # Resolve dtype
102
+ float_dtype = dtype if dtype is not None else xp.float64
103
+
104
+ # Convert groups to flat arrays in the target backend
105
+ flat_groups = []
106
+ for g in groups:
107
+ arr = xp.asarray(g, dtype=float_dtype).ravel()
108
+ n_i = int(arr.shape[0])
109
+ if n_i < 1:
110
+ raise ValueError("each group must contain at least 1 observation")
111
+ flat_groups.append(arr)
112
+
113
+ k = len(flat_groups)
114
+ # Use first group as device reference for torch
115
+ ref = flat_groups[0]
116
+ group_sizes = xp.asarray([int(g.shape[0]) for g in flat_groups], dtype=float_dtype)
117
+ # Ensure group_sizes is on same device as groups (torch CUDA)
118
+ if hasattr(group_sizes, 'to') and hasattr(ref, 'device'):
119
+ group_sizes = group_sizes.to(device=ref.device)
120
+ N = _to_float_scalar(xp.sum(group_sizes))
121
+
122
+ if N <= k:
123
+ raise ValueError(
124
+ f"total observations ({int(N)}) must exceed number of groups ({k})"
125
+ )
126
+
127
+ # Group means — computed on device, single sync at the end
128
+ group_means = xp.empty(k, dtype=float_dtype)
129
+ if hasattr(group_means, 'to') and hasattr(ref, 'device'):
130
+ group_means = group_means.to(device=ref.device)
131
+ for i, g in enumerate(flat_groups):
132
+ group_means[i] = xp.sum(g) / g.shape[0]
133
+
134
+ # Grand mean (weighted by group sizes)
135
+ grand_mean = xp.sum(group_means * group_sizes) / N
136
+
137
+ # SSB (between-group sum of squares)
138
+ ssb = xp.sum(group_sizes * (group_means - grand_mean) ** 2)
139
+
140
+ # SSW (within-group sum of squares)
141
+ ssw = xp.zeros(1, dtype=float_dtype)
142
+ if hasattr(ssw, 'to') and hasattr(ref, 'device'):
143
+ ssw = ssw.to(device=ref.device)
144
+ for i, g in enumerate(flat_groups):
145
+ diff = g - group_means[i]
146
+ ssw = ssw + xp.sum(diff * diff)
147
+
148
+ # Single sync to CPU
149
+ ssb = _to_float_scalar(ssb)
150
+ ssw = _to_float_scalar(ssw)
151
+
152
+ df_between = k - 1
153
+ df_within = int(N) - k
154
+
155
+ # Edge case: no within-group variance
156
+ if ssw == 0.0:
157
+ if ssb == 0.0:
158
+ # All observations identical
159
+ return AnovaResult(
160
+ statistic=float("nan"),
161
+ pvalue=float("nan"),
162
+ df_between=df_between,
163
+ df_within=df_within,
164
+ eta_squared=float("nan"),
165
+ )
166
+ # Perfect separation
167
+ return AnovaResult(
168
+ statistic=float("inf"),
169
+ pvalue=0.0,
170
+ df_between=df_between,
171
+ df_within=df_within,
172
+ eta_squared=1.0,
173
+ )
174
+
175
+ ms_between = ssb / df_between
176
+ ms_within = ssw / df_within
177
+ f_stat = ms_between / ms_within
178
+
179
+ eta_squared = ssb / (ssb + ssw)
180
+
181
+ # P-value from F survival function via statgpu.inference
182
+ from statgpu.inference._distributions_backend import get_distribution
183
+
184
+ f_dist = get_distribution("f", backend=resolved)
185
+ pvalue_arr = f_dist.sf(f_stat, df_between, df_within)
186
+ pvalue = _to_float_scalar(pvalue_arr)
187
+
188
+ return AnovaResult(
189
+ statistic=f_stat,
190
+ pvalue=pvalue,
191
+ df_between=df_between,
192
+ df_within=df_within,
193
+ eta_squared=eta_squared,
194
+ )
@@ -0,0 +1,83 @@
1
+ """
2
+ statgpu.backends – pluggable compute backends for array operations.
3
+
4
+ Supported backends
5
+ ------------------
6
+ * **NumpyBackend** – CPU, always available.
7
+ * **CuPyBackend** – CUDA GPU via CuPy (install ``statgpu[gpu11]`` or
8
+ ``statgpu[gpu12]``).
9
+ * **TorchBackend** – CUDA GPU (or CPU) via PyTorch (install
10
+ ``statgpu[torch]``).
11
+
12
+ Quick start
13
+ -----------
14
+ >>> from statgpu.backends import get_backend
15
+ >>> backend = get_backend() # auto-detects best available backend
16
+ >>> xp = backend.xp # array module (numpy / cupy / torch)
17
+ >>> arr = backend.asarray([1, 2, 3])
18
+ >>> backend.to_numpy(arr)
19
+ array([1, 2, 3])
20
+
21
+ Use ``get_backend(backend='cupy')`` or ``get_backend(backend='torch')`` to
22
+ force a specific library.
23
+ """
24
+
25
+ from ._base import BackendBase, _is_cupy_array, _is_torch_array, _resolve_backend
26
+ from ._numpy import NumpyBackend
27
+ from ._cupy import CuPyBackend
28
+ from ._torch import TorchBackend
29
+ from ._factory import get_backend
30
+ from ._utils import (
31
+ _get_xp,
32
+ _to_numpy,
33
+ _to_float_scalar,
34
+ _get_torch_device_str,
35
+ _cupy_to_torch_dlpack,
36
+ _torch_to_cupy_dlpack,
37
+ _numpy_to_torch_tensor,
38
+ _move_torch_tensor,
39
+ _torch_dev,
40
+ _LINALG_ERRORS,
41
+ xp_zeros,
42
+ xp_eye,
43
+ xp_full,
44
+ xp_astype,
45
+ xp_asarray,
46
+ xp_empty,
47
+ xp_arange,
48
+ xp_ones,
49
+ xp_maximum,
50
+ xp_copy,
51
+ xp_cholesky_solve,
52
+ )
53
+
54
+ __all__ = [
55
+ "BackendBase",
56
+ "NumpyBackend",
57
+ "CuPyBackend",
58
+ "TorchBackend",
59
+ "get_backend",
60
+ "_is_cupy_array",
61
+ "_is_torch_array",
62
+ "_resolve_backend",
63
+ "_get_xp",
64
+ "_to_numpy",
65
+ "_to_float_scalar",
66
+ "_get_torch_device_str",
67
+ "_cupy_to_torch_dlpack",
68
+ "_torch_to_cupy_dlpack",
69
+ "_numpy_to_torch_tensor",
70
+ "_move_torch_tensor",
71
+ "_torch_dev",
72
+ "xp_zeros",
73
+ "xp_eye",
74
+ "xp_full",
75
+ "xp_astype",
76
+ "xp_asarray",
77
+ "xp_empty",
78
+ "xp_arange",
79
+ "xp_ones",
80
+ "xp_maximum",
81
+ "xp_copy",
82
+ "xp_cholesky_solve",
83
+ ]