statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. statgpu/__init__.py +174 -0
  2. statgpu/_base.py +544 -0
  3. statgpu/_config.py +127 -0
  4. statgpu/anova/__init__.py +5 -0
  5. statgpu/anova/_oneway.py +194 -0
  6. statgpu/backends/__init__.py +83 -0
  7. statgpu/backends/_array_ops.py +529 -0
  8. statgpu/backends/_base.py +184 -0
  9. statgpu/backends/_cupy.py +453 -0
  10. statgpu/backends/_factory.py +65 -0
  11. statgpu/backends/_gpu_inference_cupy.py +214 -0
  12. statgpu/backends/_gpu_inference_torch.py +422 -0
  13. statgpu/backends/_numpy.py +324 -0
  14. statgpu/backends/_torch.py +685 -0
  15. statgpu/backends/_torch_safe.py +47 -0
  16. statgpu/backends/_utils.py +423 -0
  17. statgpu/core/__init__.py +10 -0
  18. statgpu/core/formula/__init__.py +33 -0
  19. statgpu/core/formula/_design.py +99 -0
  20. statgpu/core/formula/_parser.py +191 -0
  21. statgpu/core/formula/_terms.py +70 -0
  22. statgpu/core/formula/tests/__init__.py +0 -0
  23. statgpu/core/formula/tests/test_parser.py +194 -0
  24. statgpu/covariance/__init__.py +6 -0
  25. statgpu/covariance/_empirical.py +310 -0
  26. statgpu/covariance/_shrinkage.py +248 -0
  27. statgpu/cross_validation/__init__.py +31 -0
  28. statgpu/cross_validation/_base.py +410 -0
  29. statgpu/cross_validation/_engine.py +167 -0
  30. statgpu/diagnostics/__init__.py +7 -0
  31. statgpu/diagnostics/_regression_diagnostics.py +188 -0
  32. statgpu/feature_selection/__init__.py +24 -0
  33. statgpu/feature_selection/_knockoff.py +870 -0
  34. statgpu/feature_selection/_knockoff_utils.py +1003 -0
  35. statgpu/feature_selection/_stepwise.py +300 -0
  36. statgpu/glm_core/__init__.py +81 -0
  37. statgpu/glm_core/_base.py +202 -0
  38. statgpu/glm_core/_family.py +362 -0
  39. statgpu/glm_core/_fused.py +149 -0
  40. statgpu/glm_core/_gamma.py +111 -0
  41. statgpu/glm_core/_inverse_gaussian.py +62 -0
  42. statgpu/glm_core/_irls.py +561 -0
  43. statgpu/glm_core/_logistic.py +82 -0
  44. statgpu/glm_core/_negative_binomial.py +68 -0
  45. statgpu/glm_core/_poisson.py +60 -0
  46. statgpu/glm_core/_solver_legacy.py +100 -0
  47. statgpu/glm_core/_squared.py +53 -0
  48. statgpu/glm_core/_tweedie.py +74 -0
  49. statgpu/inference/__init__.py +239 -0
  50. statgpu/inference/_distributions_backend.py +2610 -0
  51. statgpu/inference/_multiple_testing.py +391 -0
  52. statgpu/inference/_resampling.py +1400 -0
  53. statgpu/inference/_results.py +265 -0
  54. statgpu/linear_model/__init__.py +75 -0
  55. statgpu/linear_model/_gaussian_inference.py +306 -0
  56. statgpu/linear_model/_glm_base.py +1261 -0
  57. statgpu/linear_model/_ordered_logit.py +52 -0
  58. statgpu/linear_model/_ordered_probit.py +50 -0
  59. statgpu/linear_model/_stats.py +170 -0
  60. statgpu/linear_model/cv/__init__.py +13 -0
  61. statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
  62. statgpu/linear_model/cv/_lasso_cv.py +253 -0
  63. statgpu/linear_model/cv/_logistic_cv.py +895 -0
  64. statgpu/linear_model/cv/_ridge_cv.py +1160 -0
  65. statgpu/linear_model/legacy/__init__.py +1 -0
  66. statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
  67. statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
  68. statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
  69. statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
  70. statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
  71. statgpu/linear_model/legacy/_solver_legacy.py +104 -0
  72. statgpu/linear_model/penalized/__init__.py +25 -0
  73. statgpu/linear_model/penalized/_base.py +437 -0
  74. statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
  75. statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
  76. statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
  77. statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
  78. statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
  79. statgpu/linear_model/penalized/_penalized_linear.py +236 -0
  80. statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
  81. statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
  82. statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
  83. statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
  84. statgpu/linear_model/penalized/_predict_mixin.py +182 -0
  85. statgpu/linear_model/wrappers/__init__.py +31 -0
  86. statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
  87. statgpu/linear_model/wrappers/_elasticnet.py +75 -0
  88. statgpu/linear_model/wrappers/_gamma.py +67 -0
  89. statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
  90. statgpu/linear_model/wrappers/_lasso.py +2124 -0
  91. statgpu/linear_model/wrappers/_linear.py +1127 -0
  92. statgpu/linear_model/wrappers/_logistic.py +1435 -0
  93. statgpu/linear_model/wrappers/_mcp.py +58 -0
  94. statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
  95. statgpu/linear_model/wrappers/_poisson.py +48 -0
  96. statgpu/linear_model/wrappers/_ridge.py +166 -0
  97. statgpu/linear_model/wrappers/_scad.py +58 -0
  98. statgpu/linear_model/wrappers/_tweedie.py +57 -0
  99. statgpu/metrics/__init__.py +21 -0
  100. statgpu/metrics/_classification.py +591 -0
  101. statgpu/nonparametric/__init__.py +50 -0
  102. statgpu/nonparametric/kernel_methods/__init__.py +25 -0
  103. statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
  104. statgpu/nonparametric/kernel_methods/_krr.py +234 -0
  105. statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
  106. statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
  107. statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
  108. statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
  109. statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
  110. statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
  111. statgpu/nonparametric/splines/__init__.py +5 -0
  112. statgpu/nonparametric/splines/_bspline_basis.py +336 -0
  113. statgpu/nonparametric/splines/_penalized.py +349 -0
  114. statgpu/panel/__init__.py +19 -0
  115. statgpu/panel/_covariance.py +140 -0
  116. statgpu/panel/_fixed_effects.py +420 -0
  117. statgpu/panel/_random_effects.py +385 -0
  118. statgpu/panel/_utils.py +482 -0
  119. statgpu/penalties/__init__.py +139 -0
  120. statgpu/penalties/_adaptive_l1.py +313 -0
  121. statgpu/penalties/_base.py +261 -0
  122. statgpu/penalties/_categories.py +39 -0
  123. statgpu/penalties/_elasticnet.py +98 -0
  124. statgpu/penalties/_group_lasso.py +678 -0
  125. statgpu/penalties/_group_mcp.py +553 -0
  126. statgpu/penalties/_group_scad.py +605 -0
  127. statgpu/penalties/_l1.py +107 -0
  128. statgpu/penalties/_l2.py +77 -0
  129. statgpu/penalties/_mcp.py +237 -0
  130. statgpu/penalties/_scad.py +260 -0
  131. statgpu/semiparametric/__init__.py +5 -0
  132. statgpu/semiparametric/_gam.py +401 -0
  133. statgpu/solvers/__init__.py +24 -0
  134. statgpu/solvers/_admm.py +241 -0
  135. statgpu/solvers/_constants.py +15 -0
  136. statgpu/solvers/_convergence.py +6 -0
  137. statgpu/solvers/_fista.py +436 -0
  138. statgpu/solvers/_fista_bb.py +513 -0
  139. statgpu/solvers/_fista_lla.py +541 -0
  140. statgpu/solvers/_lbfgs.py +206 -0
  141. statgpu/solvers/_newton.py +149 -0
  142. statgpu/solvers/_utils.py +277 -0
  143. statgpu/survival/__init__.py +14 -0
  144. statgpu/survival/_cox.py +3974 -0
  145. statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
  146. statgpu/survival/_cox_cv.py +1159 -0
  147. statgpu/survival/_cox_efron_cuda.py +1280 -0
  148. statgpu/survival/_cox_efron_triton.py +359 -0
  149. statgpu/unsupervised/__init__.py +29 -0
  150. statgpu/unsupervised/_agglomerative.py +307 -0
  151. statgpu/unsupervised/_dbscan.py +263 -0
  152. statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
  153. statgpu/unsupervised/_gmm.py +332 -0
  154. statgpu/unsupervised/_incremental_pca.py +176 -0
  155. statgpu/unsupervised/_kmeans.py +261 -0
  156. statgpu/unsupervised/_minibatch_kmeans.py +299 -0
  157. statgpu/unsupervised/_minibatch_nmf.py +252 -0
  158. statgpu/unsupervised/_nmf.py +190 -0
  159. statgpu/unsupervised/_pca.py +189 -0
  160. statgpu/unsupervised/_truncated_svd.py +132 -0
  161. statgpu/unsupervised/_tsne.py +192 -0
  162. statgpu/unsupervised/_umap.py +224 -0
  163. statgpu/unsupervised/_utils.py +134 -0
  164. statgpu-0.1.0.dist-info/METADATA +245 -0
  165. statgpu-0.1.0.dist-info/RECORD +168 -0
  166. statgpu-0.1.0.dist-info/WHEEL +5 -0
  167. statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
  168. statgpu-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,191 @@
1
+ """
2
+ FormulaParser – R-style formula parser wrapping patsy.
3
+
4
+ Provides the ``FormulaParser`` class that converts R-style formulas like
5
+ ``"y ~ x1 + x2 + C(sex)"`` into design matrices, using `patsy` internally.
6
+ """
7
+
8
+ from typing import Optional, Tuple, List, Any
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+
14
+ class FormulaParser:
15
+ """R-style formula parser that builds design matrices via patsy.
16
+
17
+ Parameters
18
+ ----------
19
+ formula : str
20
+ R-style formula string, e.g. ``"y ~ x1 + x2 + C(sex)"``.
21
+
22
+ Attributes
23
+ ----------
24
+ formula : str
25
+ The original formula string.
26
+ design_info : patsy.DesignInfo or None
27
+ Design matrix metadata (column names, term definitions).
28
+ Set after :meth:`eval` is called.
29
+ column_names : list[str] or None
30
+ Names of the predictor columns (excluding the response).
31
+ Set after :meth:`eval` is called.
32
+
33
+ Examples
34
+ --------
35
+ >>> import pandas as pd
36
+ >>> import numpy as np
37
+ >>> df = pd.DataFrame({
38
+ ... "y": np.random.randn(100),
39
+ ... "x1": np.random.randn(100),
40
+ ... "x2": np.random.randn(100),
41
+ ... })
42
+ >>> parser = FormulaParser("y ~ x1 + x2")
43
+ >>> y, X, info = parser.eval(df)
44
+ >>> parser.column_names
45
+ ['x1', 'x2']
46
+ """
47
+
48
+ def __init__(self, formula: str):
49
+ self.formula = formula
50
+ self._design_info = None
51
+ self._y_names: Optional[List[str]] = None
52
+
53
+ @property
54
+ def design_info(self):
55
+ """Design matrix metadata, available after :meth:`eval`."""
56
+ return self._design_info
57
+
58
+ @property
59
+ def column_names(self) -> Optional[List[str]]:
60
+ """Predictor column names, available after :meth:`eval`."""
61
+ if self._design_info is None:
62
+ return None
63
+ return list(self._design_info.column_names)
64
+
65
+ def _require_patsy(self):
66
+ """Return patsy module or raise ImportError with guidance."""
67
+ try:
68
+ import patsy
69
+ except ImportError:
70
+ raise ImportError(
71
+ "The 'patsy' package is required for formula-based model fitting. "
72
+ "Install it with: pip install statgpu[formula] "
73
+ "or: pip install patsy"
74
+ )
75
+ return patsy
76
+
77
+ def eval(
78
+ self,
79
+ data: pd.DataFrame,
80
+ eval_env: int = 0,
81
+ ) -> Tuple[np.ndarray, np.ndarray, Any]:
82
+ """Parse formula and build design matrices from a DataFrame.
83
+
84
+ Parameters
85
+ ----------
86
+ data : pd.DataFrame
87
+ DataFrame containing the columns referenced in the formula.
88
+ eval_env : int, default=0
89
+ Evaluation frame depth for patsy name resolution.
90
+
91
+ Returns
92
+ -------
93
+ y : ndarray of shape (n_obs,) or (n_obs, n_responses)
94
+ Response variable(s).
95
+ X : ndarray of shape (n_obs, n_predictors)
96
+ Predictor design matrix.
97
+ design_info : patsy.DesignInfo
98
+ Metadata for the predictor design (column names, term info).
99
+ """
100
+ patsy = self._require_patsy()
101
+ data = data.copy()
102
+
103
+ y, X = patsy.dmatrices(
104
+ self.formula,
105
+ data,
106
+ eval_env=eval_env + 1,
107
+ return_type="matrix",
108
+ )
109
+
110
+ self._y_names = list(y.design_info.column_names)
111
+ self._design_info = X.design_info
112
+
113
+ y_arr = np.asarray(y)
114
+ if y_arr.ndim == 2 and y_arr.shape[1] == 1:
115
+ y_arr = y_arr.ravel()
116
+ X_arr = np.asarray(X)
117
+
118
+ return y_arr, X_arr, X.design_info
119
+
120
+ def transform(
121
+ self,
122
+ new_data: pd.DataFrame,
123
+ eval_env: int = 0,
124
+ ) -> np.ndarray:
125
+ """Build a design matrix for new data using the stored design_info.
126
+
127
+ Used during :meth:`predict` to ensure new data is encoded
128
+ with the same column structure (including categorical coding)
129
+ as the training data.
130
+
131
+ Parameters
132
+ ----------
133
+ new_data : pd.DataFrame
134
+ DataFrame with the same columns as the training data.
135
+ eval_env : int, default=0
136
+ Evaluation frame depth for patsy name resolution.
137
+
138
+ Returns
139
+ -------
140
+ X_new : ndarray of shape (n_new_obs, n_predictors)
141
+ Design matrix aligned with the training design.
142
+
143
+ Raises
144
+ ------
145
+ RuntimeError
146
+ If :meth:`eval` has not been called yet (no design_info available).
147
+ ValueError
148
+ If new_data has columns that don't match the training structure.
149
+ """
150
+ if self._design_info is None:
151
+ raise RuntimeError(
152
+ "Cannot transform: no design_info available. "
153
+ "Call eval() first on training data."
154
+ )
155
+
156
+ patsy = self._require_patsy()
157
+
158
+ X_new = patsy.build_design_matrices(
159
+ [self._design_info],
160
+ new_data,
161
+ return_type="matrix",
162
+ )[0]
163
+
164
+ return np.asarray(X_new)
165
+
166
+ def summary(self) -> str:
167
+ """Return a human-readable summary of the formula parsing.
168
+
169
+ Shows the formula string, response variables, predictor names,
170
+ and term definitions (useful for debugging categorical encoding).
171
+ """
172
+ lines = [f"Formula: {self.formula}"]
173
+
174
+ if self._design_info is None:
175
+ lines.append("(Not yet evaluated. Call eval() to parse.)")
176
+ return "\n".join(lines)
177
+
178
+ lines.append(f"Response: {self._y_names}")
179
+ lines.append(f"Predictors ({len(self.column_names)}):")
180
+ for name in self.column_names:
181
+ lines.append(f" - {name}")
182
+
183
+ lines.append("\nTerms:")
184
+ for term in self._design_info.term_name_slices.keys():
185
+ lines.append(f" {term}")
186
+
187
+ return "\n".join(lines)
188
+
189
+ def __repr__(self) -> str:
190
+ evaluated = "evaluated" if self._design_info is not None else "pending"
191
+ return f"FormulaParser({self.formula!r}, {evaluated})"
@@ -0,0 +1,70 @@
1
+ """
2
+ Formula term helpers and custom evaluation environments.
3
+
4
+ Patsy natively supports R-style formula terms:
5
+
6
+ - ``C(var)`` — treat as categorical (one-hot encoding)
7
+ - ``np.func(var)`` — apply numpy function (e.g. ``np.log(x)``)
8
+ - ``x1:x2`` — interaction only
9
+ - ``x1*x2`` — main effects + interaction
10
+ - ``x1 + x2`` — additive
11
+ - ``x1 + x2 - 1`` — additive without intercept
12
+ - ``np.log(x)`` — transformations
13
+
14
+ This module provides helper functions for constructing custom
15
+ patsy evaluation environments, needed for model-specific syntax
16
+ like ``Surv(time, event)`` in Cox PH models.
17
+ """
18
+
19
+ from typing import Dict, Any, Optional
20
+
21
+ import numpy as np
22
+
23
+
24
+ def _surv(time, event):
25
+ """Survival function for patsy formula parsing.
26
+
27
+ Mimics R's survival::Surv() function for use in patsy formulas::
28
+
29
+ "Surv(time, event) ~ x1 + x2"
30
+
31
+ Parameters
32
+ ----------
33
+ time : array-like
34
+ Survival/follow-up times.
35
+ event : array-like
36
+ Event indicator (1 = event occurred, 0 = censored).
37
+
38
+ Returns
39
+ -------
40
+ result : ndarray of shape (n, 2)
41
+ Column 0: time, Column 1: event.
42
+ """
43
+ time = np.asarray(time, dtype=np.float64).ravel()
44
+ event = np.asarray(event, dtype=np.float64).ravel()
45
+
46
+ if len(time) != len(event):
47
+ raise ValueError(
48
+ f"time ({len(time)} elements) and event ({len(event)} elements) "
49
+ "must have the same length."
50
+ )
51
+
52
+ return np.column_stack([time, event])
53
+
54
+
55
+ def make_surv_env() -> Dict[str, Any]:
56
+ """Create a patsy evaluation environment with ``Surv`` function.
57
+
58
+ Returns
59
+ -------
60
+ env : dict
61
+ Custom functions for patsy's ``EvalEnvironment``.
62
+
63
+ Examples
64
+ --------
65
+ >>> from statgpu.core.formula._terms import make_surv_env
66
+ >>> import patsy
67
+ >>> env = make_surv_env()
68
+ >>> # Then pass env to patsy.dmatrices or dmatrix
69
+ """
70
+ return {"Surv": _surv}
File without changes
@@ -0,0 +1,194 @@
1
+ """
2
+ Tests for statgpu.core.formula module.
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import pytest
8
+
9
+
10
+ @pytest.fixture
11
+ def sample_df():
12
+ """Standard test DataFrame."""
13
+ np.random.seed(42)
14
+ n = 200
15
+ return pd.DataFrame({
16
+ "y": np.random.randn(n),
17
+ "x1": np.random.randn(n),
18
+ "x2": np.random.randn(n),
19
+ "cat": pd.Categorical(np.random.choice(["A", "B", "C"], n)),
20
+ })
21
+
22
+
23
+ class TestFormulaParserBasic:
24
+ """Test basic formula parsing."""
25
+
26
+ def test_simple_formula(self, sample_df):
27
+ """Test simple y ~ x1 + x2 formula."""
28
+ from statgpu.core.formula import FormulaParser
29
+
30
+ parser = FormulaParser("y ~ x1 + x2")
31
+ y, X, info = parser.eval(sample_df)
32
+
33
+ assert y.shape == (200,)
34
+ assert X.shape == (200, 3) # intercept + x1 + x2
35
+ assert parser.column_names == ["Intercept", "x1", "x2"]
36
+
37
+ def test_no_intercept(self, sample_df):
38
+ """Test y ~ x1 + x2 - 1 (no intercept)."""
39
+ from statgpu.core.formula import FormulaParser
40
+
41
+ parser = FormulaParser("y ~ x1 + x2 - 1")
42
+ y, X, info = parser.eval(sample_df)
43
+
44
+ assert X.shape == (200, 2)
45
+ assert parser.column_names == ["x1", "x2"]
46
+
47
+ def test_categorical_encoding(self, sample_df):
48
+ """Test C() for categorical variables."""
49
+ from statgpu.core.formula import FormulaParser
50
+
51
+ parser = FormulaParser("y ~ x1 + C(cat)")
52
+ y, X, info = parser.eval(sample_df)
53
+
54
+ # Intercept + x1 + cat[T.B] + cat[T.C] = 4 columns
55
+ assert X.shape[1] == 4
56
+ assert "x1" in parser.column_names
57
+ assert any("cat" in name for name in parser.column_names)
58
+
59
+ def test_interaction(self, sample_df):
60
+ """Test x1:x2 interaction."""
61
+ from statgpu.core.formula import FormulaParser
62
+
63
+ parser = FormulaParser("y ~ x1 + x2 + x1:x2")
64
+ y, X, info = parser.eval(sample_df)
65
+
66
+ assert X.shape[1] == 4 # intercept + x1 + x2 + x1:x2
67
+
68
+ def test_star_operator(self, sample_df):
69
+ """Test x1*x2 (main effects + interaction)."""
70
+ from statgpu.core.formula import FormulaParser
71
+
72
+ parser = FormulaParser("y ~ x1 * x2")
73
+ y, X, info = parser.eval(sample_df)
74
+
75
+ assert X.shape[1] == 4 # intercept + x1 + x2 + x1:x2
76
+
77
+ def test_transform(self, sample_df):
78
+ """Test np() transformations."""
79
+ from statgpu.core.formula import FormulaParser
80
+
81
+ parser = FormulaParser("y ~ np.log(np.abs(x1)) + x2")
82
+ y, X, info = parser.eval(sample_df)
83
+
84
+ assert y.shape == (200,)
85
+ assert X.shape[1] == 3 # intercept + transformed_x1 + x2
86
+
87
+
88
+ class TestFormulaParserTransform:
89
+ """Test transform (predict-time) functionality."""
90
+
91
+ def test_transform_new_data(self, sample_df):
92
+ """Test transform on new data with same structure."""
93
+ from statgpu.core.formula import FormulaParser
94
+
95
+ parser = FormulaParser("y ~ x1 + x2")
96
+ parser.eval(sample_df)
97
+
98
+ new_data = pd.DataFrame({
99
+ "x1": [0.5, -0.3],
100
+ "x2": [1.2, 0.8],
101
+ })
102
+ X_new = parser.transform(new_data)
103
+
104
+ assert X_new.shape == (2, 3) # 2 rows, intercept + 2 cols
105
+
106
+ def test_transform_with_categorical(self, sample_df):
107
+ """Test transform handles categorical encoding from training."""
108
+ from statgpu.core.formula import FormulaParser
109
+
110
+ parser = FormulaParser("y ~ x1 + C(cat)")
111
+ parser.eval(sample_df)
112
+
113
+ new_data = pd.DataFrame({
114
+ "x1": [0.5],
115
+ "cat": pd.Categorical(["A"]),
116
+ })
117
+ X_new = parser.transform(new_data)
118
+
119
+ assert X_new.shape == (1, 4) # intercept + x1 + cat[B] + cat[C]
120
+
121
+ def test_transform_no_design_info(self):
122
+ """Test transform raises when not yet evaluated."""
123
+ from statgpu.core.formula import FormulaParser
124
+
125
+ parser = FormulaParser("y ~ x1")
126
+ new_data = pd.DataFrame({"x1": [1.0]})
127
+
128
+ with pytest.raises(RuntimeError, match="no design_info available"):
129
+ parser.transform(new_data)
130
+
131
+
132
+ class TestParseFormulaSafe:
133
+ """Test parse_formula_safe fallback logic."""
134
+
135
+ def test_formula_path(self, sample_df):
136
+ """Test formula path works."""
137
+ from statgpu.core.formula import parse_formula_safe
138
+
139
+ y, X, info = parse_formula_safe("y ~ x1", data=sample_df)
140
+ assert y.shape == (200,)
141
+ assert info is not None
142
+
143
+ def test_array_path(self, sample_df):
144
+ """Test array path when formula is None."""
145
+ from statgpu.core.formula import parse_formula_safe
146
+
147
+ X = sample_df[["x1", "x2"]].values
148
+ y = sample_df["y"].values
149
+ y_out, X_out, info = parse_formula_safe(None, None, X=X, y=y)
150
+
151
+ assert info is None
152
+ np.testing.assert_array_equal(y_out, y)
153
+ np.testing.assert_array_equal(X_out, X)
154
+
155
+ def test_formula_without_data_raises(self):
156
+ """Test that formula without data raises."""
157
+ from statgpu.core.formula import parse_formula_safe
158
+
159
+ with pytest.raises(ValueError, match="data"):
160
+ parse_formula_safe("y ~ x1", None)
161
+
162
+ def test_no_input_raises(self):
163
+ """Test that no input raises."""
164
+ from statgpu.core.formula import parse_formula_safe
165
+
166
+ with pytest.raises(ValueError, match="Either formula"):
167
+ parse_formula_safe(None, None)
168
+
169
+
170
+ class TestFormulaParserSummary:
171
+ """Test FormulaParser.summary() output."""
172
+
173
+ def test_summary_before_eval(self, sample_df):
174
+ """Test summary shows pending state."""
175
+ from statgpu.core.formula import FormulaParser
176
+
177
+ parser = FormulaParser("y ~ x1 + x2")
178
+ s = parser.summary()
179
+
180
+ assert "y ~ x1 + x2" in s
181
+ assert "pending" in s.lower() or "Not yet evaluated" in s
182
+
183
+ def test_summary_after_eval(self, sample_df):
184
+ """Test summary shows parsed info."""
185
+ from statgpu.core.formula import FormulaParser
186
+
187
+ parser = FormulaParser("y ~ x1 + x2")
188
+ parser.eval(sample_df)
189
+ s = parser.summary()
190
+
191
+ assert "y ~ x1 + x2" in s
192
+ assert "x1" in s
193
+ assert "x2" in s
194
+ assert "Predictors (3)" in s
@@ -0,0 +1,6 @@
1
+ """Covariance estimation with GPU acceleration."""
2
+
3
+ from ._empirical import EmpiricalCovariance
4
+ from ._shrinkage import LedoitWolf, OAS
5
+
6
+ __all__ = ["EmpiricalCovariance", "LedoitWolf", "OAS"]