statgpu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statgpu/__init__.py +174 -0
- statgpu/_base.py +544 -0
- statgpu/_config.py +127 -0
- statgpu/anova/__init__.py +5 -0
- statgpu/anova/_oneway.py +194 -0
- statgpu/backends/__init__.py +83 -0
- statgpu/backends/_array_ops.py +529 -0
- statgpu/backends/_base.py +184 -0
- statgpu/backends/_cupy.py +453 -0
- statgpu/backends/_factory.py +65 -0
- statgpu/backends/_gpu_inference_cupy.py +214 -0
- statgpu/backends/_gpu_inference_torch.py +422 -0
- statgpu/backends/_numpy.py +324 -0
- statgpu/backends/_torch.py +685 -0
- statgpu/backends/_torch_safe.py +47 -0
- statgpu/backends/_utils.py +423 -0
- statgpu/core/__init__.py +10 -0
- statgpu/core/formula/__init__.py +33 -0
- statgpu/core/formula/_design.py +99 -0
- statgpu/core/formula/_parser.py +191 -0
- statgpu/core/formula/_terms.py +70 -0
- statgpu/core/formula/tests/__init__.py +0 -0
- statgpu/core/formula/tests/test_parser.py +194 -0
- statgpu/covariance/__init__.py +6 -0
- statgpu/covariance/_empirical.py +310 -0
- statgpu/covariance/_shrinkage.py +248 -0
- statgpu/cross_validation/__init__.py +31 -0
- statgpu/cross_validation/_base.py +410 -0
- statgpu/cross_validation/_engine.py +167 -0
- statgpu/diagnostics/__init__.py +7 -0
- statgpu/diagnostics/_regression_diagnostics.py +188 -0
- statgpu/feature_selection/__init__.py +24 -0
- statgpu/feature_selection/_knockoff.py +870 -0
- statgpu/feature_selection/_knockoff_utils.py +1003 -0
- statgpu/feature_selection/_stepwise.py +300 -0
- statgpu/glm_core/__init__.py +81 -0
- statgpu/glm_core/_base.py +202 -0
- statgpu/glm_core/_family.py +362 -0
- statgpu/glm_core/_fused.py +149 -0
- statgpu/glm_core/_gamma.py +111 -0
- statgpu/glm_core/_inverse_gaussian.py +62 -0
- statgpu/glm_core/_irls.py +561 -0
- statgpu/glm_core/_logistic.py +82 -0
- statgpu/glm_core/_negative_binomial.py +68 -0
- statgpu/glm_core/_poisson.py +60 -0
- statgpu/glm_core/_solver_legacy.py +100 -0
- statgpu/glm_core/_squared.py +53 -0
- statgpu/glm_core/_tweedie.py +74 -0
- statgpu/inference/__init__.py +239 -0
- statgpu/inference/_distributions_backend.py +2610 -0
- statgpu/inference/_multiple_testing.py +391 -0
- statgpu/inference/_resampling.py +1400 -0
- statgpu/inference/_results.py +265 -0
- statgpu/linear_model/__init__.py +75 -0
- statgpu/linear_model/_gaussian_inference.py +306 -0
- statgpu/linear_model/_glm_base.py +1261 -0
- statgpu/linear_model/_ordered_logit.py +52 -0
- statgpu/linear_model/_ordered_probit.py +50 -0
- statgpu/linear_model/_stats.py +170 -0
- statgpu/linear_model/cv/__init__.py +13 -0
- statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
- statgpu/linear_model/cv/_lasso_cv.py +253 -0
- statgpu/linear_model/cv/_logistic_cv.py +895 -0
- statgpu/linear_model/cv/_ridge_cv.py +1160 -0
- statgpu/linear_model/legacy/__init__.py +1 -0
- statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
- statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
- statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
- statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
- statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
- statgpu/linear_model/legacy/_solver_legacy.py +104 -0
- statgpu/linear_model/penalized/__init__.py +25 -0
- statgpu/linear_model/penalized/_base.py +437 -0
- statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
- statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
- statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
- statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
- statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
- statgpu/linear_model/penalized/_penalized_linear.py +236 -0
- statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
- statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
- statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
- statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
- statgpu/linear_model/penalized/_predict_mixin.py +182 -0
- statgpu/linear_model/wrappers/__init__.py +31 -0
- statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
- statgpu/linear_model/wrappers/_elasticnet.py +75 -0
- statgpu/linear_model/wrappers/_gamma.py +67 -0
- statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
- statgpu/linear_model/wrappers/_lasso.py +2124 -0
- statgpu/linear_model/wrappers/_linear.py +1127 -0
- statgpu/linear_model/wrappers/_logistic.py +1435 -0
- statgpu/linear_model/wrappers/_mcp.py +58 -0
- statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
- statgpu/linear_model/wrappers/_poisson.py +48 -0
- statgpu/linear_model/wrappers/_ridge.py +166 -0
- statgpu/linear_model/wrappers/_scad.py +58 -0
- statgpu/linear_model/wrappers/_tweedie.py +57 -0
- statgpu/metrics/__init__.py +21 -0
- statgpu/metrics/_classification.py +591 -0
- statgpu/nonparametric/__init__.py +50 -0
- statgpu/nonparametric/kernel_methods/__init__.py +25 -0
- statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
- statgpu/nonparametric/kernel_methods/_krr.py +234 -0
- statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
- statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
- statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
- statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
- statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
- statgpu/nonparametric/splines/__init__.py +5 -0
- statgpu/nonparametric/splines/_bspline_basis.py +336 -0
- statgpu/nonparametric/splines/_penalized.py +349 -0
- statgpu/panel/__init__.py +19 -0
- statgpu/panel/_covariance.py +140 -0
- statgpu/panel/_fixed_effects.py +420 -0
- statgpu/panel/_random_effects.py +385 -0
- statgpu/panel/_utils.py +482 -0
- statgpu/penalties/__init__.py +139 -0
- statgpu/penalties/_adaptive_l1.py +313 -0
- statgpu/penalties/_base.py +261 -0
- statgpu/penalties/_categories.py +39 -0
- statgpu/penalties/_elasticnet.py +98 -0
- statgpu/penalties/_group_lasso.py +678 -0
- statgpu/penalties/_group_mcp.py +553 -0
- statgpu/penalties/_group_scad.py +605 -0
- statgpu/penalties/_l1.py +107 -0
- statgpu/penalties/_l2.py +77 -0
- statgpu/penalties/_mcp.py +237 -0
- statgpu/penalties/_scad.py +260 -0
- statgpu/semiparametric/__init__.py +5 -0
- statgpu/semiparametric/_gam.py +401 -0
- statgpu/solvers/__init__.py +24 -0
- statgpu/solvers/_admm.py +241 -0
- statgpu/solvers/_constants.py +15 -0
- statgpu/solvers/_convergence.py +6 -0
- statgpu/solvers/_fista.py +436 -0
- statgpu/solvers/_fista_bb.py +513 -0
- statgpu/solvers/_fista_lla.py +541 -0
- statgpu/solvers/_lbfgs.py +206 -0
- statgpu/solvers/_newton.py +149 -0
- statgpu/solvers/_utils.py +277 -0
- statgpu/survival/__init__.py +14 -0
- statgpu/survival/_cox.py +3974 -0
- statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
- statgpu/survival/_cox_cv.py +1159 -0
- statgpu/survival/_cox_efron_cuda.py +1280 -0
- statgpu/survival/_cox_efron_triton.py +359 -0
- statgpu/unsupervised/__init__.py +29 -0
- statgpu/unsupervised/_agglomerative.py +307 -0
- statgpu/unsupervised/_dbscan.py +263 -0
- statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
- statgpu/unsupervised/_gmm.py +332 -0
- statgpu/unsupervised/_incremental_pca.py +176 -0
- statgpu/unsupervised/_kmeans.py +261 -0
- statgpu/unsupervised/_minibatch_kmeans.py +299 -0
- statgpu/unsupervised/_minibatch_nmf.py +252 -0
- statgpu/unsupervised/_nmf.py +190 -0
- statgpu/unsupervised/_pca.py +189 -0
- statgpu/unsupervised/_truncated_svd.py +132 -0
- statgpu/unsupervised/_tsne.py +192 -0
- statgpu/unsupervised/_umap.py +224 -0
- statgpu/unsupervised/_utils.py +134 -0
- statgpu-0.1.0.dist-info/METADATA +245 -0
- statgpu-0.1.0.dist-info/RECORD +168 -0
- statgpu-0.1.0.dist-info/WHEEL +5 -0
- statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
- statgpu-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Penalized Gamma regression wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from statgpu._config import Device
|
|
7
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PenalizedGammaRegression(PenalizedGeneralizedLinearModel):
|
|
11
|
+
"""Penalized Gamma regression with log or inverse-power link.
|
|
12
|
+
|
|
13
|
+
Thin wrapper over ``PenalizedGeneralizedLinearModel(loss="gamma", ...)``.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
penalty : str, default='l2'
|
|
18
|
+
Penalty type: 'l1', 'l2', 'elasticnet', 'scad', 'mcp', etc.
|
|
19
|
+
alpha : float, default=1.0
|
|
20
|
+
Regularization strength.
|
|
21
|
+
l1_ratio : float, default=0.5
|
|
22
|
+
ElasticNet mixing parameter (only used with penalty='elasticnet').
|
|
23
|
+
fit_intercept : bool, default=True
|
|
24
|
+
max_iter : int, default=1000
|
|
25
|
+
tol : float, default=1e-4
|
|
26
|
+
solver : str, default='auto'
|
|
27
|
+
device : str or Device, default='auto'
|
|
28
|
+
link : str, default='log'
|
|
29
|
+
Link function: 'log' or 'inverse_power'.
|
|
30
|
+
loss_kwargs : dict, optional
|
|
31
|
+
Additional keyword arguments for the loss constructor.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
37
|
+
alpha: float = 1.0,
|
|
38
|
+
l1_ratio: float = 0.5,
|
|
39
|
+
penalty_kwargs: Optional[dict] = None,
|
|
40
|
+
fit_intercept: bool = True,
|
|
41
|
+
max_iter: int = 1000,
|
|
42
|
+
tol: float = 1e-4,
|
|
43
|
+
device: Union[str, Device] = Device.AUTO,
|
|
44
|
+
n_jobs: Optional[int] = None,
|
|
45
|
+
cpu_solver: str = "fista",
|
|
46
|
+
solver: str = "auto",
|
|
47
|
+
lipschitz_L: Optional[float] = None,
|
|
48
|
+
gpu_memory_cleanup: bool = False,
|
|
49
|
+
compute_inference: bool = False,
|
|
50
|
+
inference_method: str = "debiased",
|
|
51
|
+
cov_type: str = "nonrobust",
|
|
52
|
+
hac_maxlags: Optional[int] = None,
|
|
53
|
+
stopping: str = "coef_delta",
|
|
54
|
+
lla: bool = True,
|
|
55
|
+
max_lla_iters: int = 50,
|
|
56
|
+
lla_tol: float = 1e-6,
|
|
57
|
+
link: str = "log",
|
|
58
|
+
loss_kwargs: Optional[dict] = None,
|
|
59
|
+
):
|
|
60
|
+
_loss_kwargs = dict(loss_kwargs) if loss_kwargs else {}
|
|
61
|
+
_loss_kwargs.setdefault("link", link)
|
|
62
|
+
super().__init__(
|
|
63
|
+
loss="gamma",
|
|
64
|
+
penalty=penalty,
|
|
65
|
+
alpha=alpha,
|
|
66
|
+
l1_ratio=l1_ratio,
|
|
67
|
+
penalty_kwargs=penalty_kwargs,
|
|
68
|
+
fit_intercept=fit_intercept,
|
|
69
|
+
max_iter=max_iter,
|
|
70
|
+
tol=tol,
|
|
71
|
+
device=device,
|
|
72
|
+
n_jobs=n_jobs,
|
|
73
|
+
cpu_solver=cpu_solver,
|
|
74
|
+
solver=solver,
|
|
75
|
+
lipschitz_L=lipschitz_L,
|
|
76
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
77
|
+
compute_inference=compute_inference,
|
|
78
|
+
inference_method=inference_method,
|
|
79
|
+
cov_type=cov_type,
|
|
80
|
+
hac_maxlags=hac_maxlags,
|
|
81
|
+
stopping=stopping,
|
|
82
|
+
lla=lla,
|
|
83
|
+
max_lla_iters=max_lla_iters,
|
|
84
|
+
lla_tol=lla_tol,
|
|
85
|
+
loss_kwargs=_loss_kwargs,
|
|
86
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Penalized Inverse Gaussian regression wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from statgpu._config import Device
|
|
7
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PenalizedInverseGaussianRegression(PenalizedGeneralizedLinearModel):
|
|
11
|
+
"""Penalized Inverse Gaussian regression."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
16
|
+
alpha: float = 1.0,
|
|
17
|
+
l1_ratio: float = 0.5,
|
|
18
|
+
penalty_kwargs: Optional[dict] = None,
|
|
19
|
+
fit_intercept: bool = True,
|
|
20
|
+
max_iter: int = 1000,
|
|
21
|
+
tol: float = 1e-4,
|
|
22
|
+
device: Union[str, Device] = Device.AUTO,
|
|
23
|
+
n_jobs: Optional[int] = None,
|
|
24
|
+
cpu_solver: str = "fista",
|
|
25
|
+
solver: str = "auto",
|
|
26
|
+
lipschitz_L: Optional[float] = None,
|
|
27
|
+
gpu_memory_cleanup: bool = False,
|
|
28
|
+
compute_inference: bool = False,
|
|
29
|
+
inference_method: str = "debiased",
|
|
30
|
+
cov_type: str = "nonrobust",
|
|
31
|
+
hac_maxlags: Optional[int] = None,
|
|
32
|
+
stopping: str = "coef_delta",
|
|
33
|
+
lla: bool = True,
|
|
34
|
+
max_lla_iters: int = 50,
|
|
35
|
+
lla_tol: float = 1e-6,
|
|
36
|
+
loss_kwargs: Optional[dict] = None,
|
|
37
|
+
):
|
|
38
|
+
super().__init__(
|
|
39
|
+
loss="inverse_gaussian",
|
|
40
|
+
penalty=penalty,
|
|
41
|
+
alpha=alpha,
|
|
42
|
+
l1_ratio=l1_ratio,
|
|
43
|
+
penalty_kwargs=penalty_kwargs,
|
|
44
|
+
fit_intercept=fit_intercept,
|
|
45
|
+
max_iter=max_iter,
|
|
46
|
+
tol=tol,
|
|
47
|
+
device=device,
|
|
48
|
+
n_jobs=n_jobs,
|
|
49
|
+
cpu_solver=cpu_solver,
|
|
50
|
+
solver=solver,
|
|
51
|
+
lipschitz_L=lipschitz_L,
|
|
52
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
53
|
+
compute_inference=compute_inference,
|
|
54
|
+
inference_method=inference_method,
|
|
55
|
+
cov_type=cov_type,
|
|
56
|
+
hac_maxlags=hac_maxlags,
|
|
57
|
+
stopping=stopping,
|
|
58
|
+
lla=lla,
|
|
59
|
+
max_lla_iters=max_lla_iters,
|
|
60
|
+
lla_tol=lla_tol,
|
|
61
|
+
loss_kwargs=loss_kwargs,
|
|
62
|
+
)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""PenalizedLinearRegression — thin wrapper over PenalizedGeneralizedLinearModel."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import stats
|
|
9
|
+
|
|
10
|
+
from statgpu._config import Device
|
|
11
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PenalizedLinearRegression(PenalizedGeneralizedLinearModel):
|
|
15
|
+
"""Gaussian penalized regression.
|
|
16
|
+
|
|
17
|
+
This typed estimator replaces the old ``PenalizedLinearRegression(loss=...)``
|
|
18
|
+
entry point. Use ``PenalizedLogisticRegression`` or
|
|
19
|
+
``PenalizedPoissonRegression`` for non-gaussian GLMs.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
penalty: Union[str, "Penalty"] = "l1",
|
|
25
|
+
alpha: float = 1.0,
|
|
26
|
+
l1_ratio: float = 0.5,
|
|
27
|
+
penalty_kwargs: Optional[dict] = None,
|
|
28
|
+
fit_intercept: bool = True,
|
|
29
|
+
max_iter: int = 1000,
|
|
30
|
+
tol: float = 1e-4,
|
|
31
|
+
device: Union[str, Device] = Device.AUTO,
|
|
32
|
+
n_jobs: Optional[int] = None,
|
|
33
|
+
cpu_solver: str = "fista",
|
|
34
|
+
solver: str = "auto",
|
|
35
|
+
lipschitz_L: Optional[float] = None,
|
|
36
|
+
gpu_memory_cleanup: bool = False,
|
|
37
|
+
compute_inference: bool = False,
|
|
38
|
+
inference_method: str = "debiased",
|
|
39
|
+
cov_type: str = "nonrobust",
|
|
40
|
+
hac_maxlags: Optional[int] = None,
|
|
41
|
+
stopping: str = "coef_delta",
|
|
42
|
+
lla: bool = True,
|
|
43
|
+
max_lla_iters: int = 50,
|
|
44
|
+
lla_tol: float = 1e-6,
|
|
45
|
+
loss_kwargs: Optional[dict] = None,
|
|
46
|
+
):
|
|
47
|
+
super().__init__(
|
|
48
|
+
loss="squared_error",
|
|
49
|
+
penalty=penalty,
|
|
50
|
+
alpha=alpha,
|
|
51
|
+
l1_ratio=l1_ratio,
|
|
52
|
+
penalty_kwargs=penalty_kwargs,
|
|
53
|
+
fit_intercept=fit_intercept,
|
|
54
|
+
max_iter=max_iter,
|
|
55
|
+
tol=tol,
|
|
56
|
+
device=device,
|
|
57
|
+
n_jobs=n_jobs,
|
|
58
|
+
cpu_solver=cpu_solver,
|
|
59
|
+
solver=solver,
|
|
60
|
+
lipschitz_L=lipschitz_L,
|
|
61
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
62
|
+
compute_inference=compute_inference,
|
|
63
|
+
inference_method=inference_method,
|
|
64
|
+
cov_type=cov_type,
|
|
65
|
+
hac_maxlags=hac_maxlags,
|
|
66
|
+
stopping=stopping,
|
|
67
|
+
lla=lla,
|
|
68
|
+
max_lla_iters=max_lla_iters,
|
|
69
|
+
lla_tol=lla_tol,
|
|
70
|
+
loss_kwargs=loss_kwargs,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def rsquared(self):
|
|
75
|
+
if self._y is None or self._resid is None:
|
|
76
|
+
return None
|
|
77
|
+
y_mean = np.mean(self._y)
|
|
78
|
+
ss_tot = np.sum((self._y - y_mean) ** 2)
|
|
79
|
+
ss_res = np.sum(self._resid ** 2)
|
|
80
|
+
return 1 - ss_res / ss_tot if ss_tot > 0 else 0.0
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def rsquared_adj(self):
|
|
84
|
+
if self._nobs is None or self._resid is None:
|
|
85
|
+
return None
|
|
86
|
+
r2 = self.rsquared
|
|
87
|
+
if r2 is None:
|
|
88
|
+
return None
|
|
89
|
+
k = len(self.coef_) if self.coef_ is not None else 0
|
|
90
|
+
return 1 - (1 - r2) * (self._nobs - 1) / self._df_resid
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def fvalue(self):
|
|
94
|
+
if self._y is None or self._resid is None:
|
|
95
|
+
return None
|
|
96
|
+
y_mean = np.mean(self._y)
|
|
97
|
+
ss_tot = np.sum((self._y - y_mean) ** 2)
|
|
98
|
+
ss_res = np.sum(self._resid ** 2)
|
|
99
|
+
ss_reg = ss_tot - ss_res
|
|
100
|
+
k = len(self.coef_) if self.coef_ is not None else 0
|
|
101
|
+
if k == 0 or ss_res <= 0:
|
|
102
|
+
return np.inf
|
|
103
|
+
return (ss_reg / k) / (ss_res / self._df_resid)
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def f_pvalue(self):
|
|
107
|
+
fv = self.fvalue
|
|
108
|
+
if fv is None:
|
|
109
|
+
return 1.0
|
|
110
|
+
k = len(self.coef_) if self.coef_ is not None else 0
|
|
111
|
+
if k == 0:
|
|
112
|
+
return None # No predictors — F-test is undefined
|
|
113
|
+
if np.isposinf(fv):
|
|
114
|
+
return 0.0
|
|
115
|
+
return 1 - stats.f.cdf(fv, k, self._df_resid)
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
def llf(self):
|
|
119
|
+
if self._nobs is None or self._resid is None:
|
|
120
|
+
return None
|
|
121
|
+
n = self._nobs
|
|
122
|
+
sigma2_mle = np.sum(self._resid ** 2) / n
|
|
123
|
+
return -n / 2 * np.log(2 * np.pi * sigma2_mle) - n / 2
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def aic(self):
|
|
127
|
+
if self._nobs is None or self._scale is None:
|
|
128
|
+
return None
|
|
129
|
+
if np.any(np.isnan(np.asarray(self._scale, dtype=float))):
|
|
130
|
+
return None
|
|
131
|
+
_llf = self.llf
|
|
132
|
+
if _llf is None:
|
|
133
|
+
return None
|
|
134
|
+
return -2 * _llf + 2 * len(self._params)
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def bic(self):
|
|
138
|
+
if self._nobs is None or self._scale is None:
|
|
139
|
+
return None
|
|
140
|
+
if np.any(np.isnan(np.asarray(self._scale, dtype=float))):
|
|
141
|
+
return None
|
|
142
|
+
_llf = self.llf
|
|
143
|
+
if _llf is None:
|
|
144
|
+
return None
|
|
145
|
+
n = self._nobs
|
|
146
|
+
k = len(self._params)
|
|
147
|
+
return -2 * _llf + k * np.log(n)
|
|
148
|
+
|
|
149
|
+
def summary(self):
|
|
150
|
+
if self.coef_ is None:
|
|
151
|
+
raise RuntimeError("Model has not been fitted yet.")
|
|
152
|
+
if not self.compute_inference:
|
|
153
|
+
raise RuntimeError(
|
|
154
|
+
"compute_inference=False: summary/inference statistics are not available. "
|
|
155
|
+
"Re-fit with compute_inference=True to use summary()."
|
|
156
|
+
)
|
|
157
|
+
if self._bse is None:
|
|
158
|
+
raise RuntimeError("Inference statistics are not available.")
|
|
159
|
+
|
|
160
|
+
if self._feature_names is not None:
|
|
161
|
+
feature_names = list(self._feature_names)
|
|
162
|
+
if self._effective_intercept:
|
|
163
|
+
feature_names.insert(0, "(Intercept)")
|
|
164
|
+
elif self._effective_intercept:
|
|
165
|
+
feature_names = ["(Intercept)"] + [f"x{i+1}" for i in range(len(self.coef_))]
|
|
166
|
+
else:
|
|
167
|
+
feature_names = [f"x{i+1}" for i in range(len(self.coef_))]
|
|
168
|
+
|
|
169
|
+
penalty_name = str(getattr(self._penalty, "name", self.penalty)).lower()
|
|
170
|
+
inference_method = str(getattr(self, "inference_method", "debiased")).lower()
|
|
171
|
+
is_debiased = penalty_name in ("l1", "elasticnet", "en") and "debiased" in inference_method
|
|
172
|
+
|
|
173
|
+
if is_debiased:
|
|
174
|
+
title = "Debiased Lasso Results"
|
|
175
|
+
stat_label = "z"
|
|
176
|
+
pval_label = "P>|z|"
|
|
177
|
+
elif penalty_name == "l2":
|
|
178
|
+
title = "Ridge Regression Results"
|
|
179
|
+
stat_label = "t"
|
|
180
|
+
pval_label = "P>|t|"
|
|
181
|
+
else:
|
|
182
|
+
title = "Penalized Linear Regression Results"
|
|
183
|
+
stat_label = "t"
|
|
184
|
+
pval_label = "P>|t|"
|
|
185
|
+
|
|
186
|
+
print("=" * 80)
|
|
187
|
+
print(f"{title:^80}")
|
|
188
|
+
print("=" * 80)
|
|
189
|
+
def _fmt(val, spec):
|
|
190
|
+
if val is None:
|
|
191
|
+
return f"{'N/A':>15}"
|
|
192
|
+
return format(val, spec)
|
|
193
|
+
|
|
194
|
+
print(f"Alpha: {float(self.alpha):>15.4f}")
|
|
195
|
+
if not is_debiased:
|
|
196
|
+
print(f"Covariance Type: {self.cov_type:>15}")
|
|
197
|
+
print(f"No. Observations: {self._nobs:>15}")
|
|
198
|
+
print(f"Degrees of Freedom: {self._df_resid:>15}")
|
|
199
|
+
print(f"R-squared: {_fmt(self.rsquared, '>15.4f')}")
|
|
200
|
+
print(f"Adj. R-squared: {_fmt(self.rsquared_adj, '>15.4f')}")
|
|
201
|
+
print(f"F-statistic: {_fmt(self.fvalue, '>15.4f')}")
|
|
202
|
+
print(f"Prob (F-statistic): {_fmt(self.f_pvalue, '>15.4e')}")
|
|
203
|
+
print(f"Log-Likelihood: {_fmt(self.llf, '>15.4f')}")
|
|
204
|
+
print(f"AIC: {_fmt(self.aic, '>15.4f')}")
|
|
205
|
+
print(f"BIC: {_fmt(self.bic, '>15.4f')}")
|
|
206
|
+
print("-" * 80)
|
|
207
|
+
print(f"{'':<15} {'coef':>12} {'std err':>12} {stat_label:>10} {pval_label:>10} {'[0.025':>12} {'0.975]':>12}")
|
|
208
|
+
print("-" * 80)
|
|
209
|
+
|
|
210
|
+
for i, name in enumerate(feature_names):
|
|
211
|
+
print(f"{name:<15} {self._params[i]:>12.4f} {self._bse[i]:>12.4f} "
|
|
212
|
+
f"{self._tvalues[i]:>10.3f} {self._pvalues[i]:>10.4f} "
|
|
213
|
+
f"{self._conf_int[i, 0]:>12.4f} {self._conf_int[i, 1]:>12.4f}")
|
|
214
|
+
|
|
215
|
+
if getattr(self, '_simultaneous_enabled', False) and self._conf_int_simultaneous is not None:
|
|
216
|
+
alpha_sim = float(getattr(self, 'simultaneous_alpha',
|
|
217
|
+
getattr(self, '_simultaneous_alpha', 0.05)))
|
|
218
|
+
B = int(getattr(self, 'simultaneous_n_bootstrap',
|
|
219
|
+
getattr(self, '_simultaneous_n_bootstrap', 1000)))
|
|
220
|
+
crit = getattr(self, '_simultaneous_critical_value', None)
|
|
221
|
+
print("-" * 80)
|
|
222
|
+
print("Simultaneous inference (max-|Z| bootstrap)")
|
|
223
|
+
print(f" alpha: {alpha_sim:.6f}")
|
|
224
|
+
print(f" n_bootstrap: {B}")
|
|
225
|
+
if crit is not None:
|
|
226
|
+
print(f" critical value (max|Z|): {crit:.4f}")
|
|
227
|
+
print("-" * 80)
|
|
228
|
+
for i, name in enumerate(feature_names):
|
|
229
|
+
lo = self._conf_int_simultaneous[i, 0]
|
|
230
|
+
hi = self._conf_int_simultaneous[i, 1]
|
|
231
|
+
print(f"{name:<15} {'':>12} {'':>12} {'':>10} {'':>10} {lo:>12.4f} {hi:>12.4f}")
|
|
232
|
+
|
|
233
|
+
print("=" * 80)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""PenalizedLogisticRegression — thin wrapper over PenalizedGeneralizedLinearModel."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from statgpu._config import Device
|
|
10
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
11
|
+
from statgpu.linear_model.penalized._predict_mixin import _ETA_CLIP
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PenalizedLogisticRegression(PenalizedGeneralizedLinearModel):
|
|
15
|
+
"""Binomial/logistic penalized GLM."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
20
|
+
alpha: float = 1.0,
|
|
21
|
+
l1_ratio: float = 0.5,
|
|
22
|
+
penalty_kwargs: Optional[dict] = None,
|
|
23
|
+
fit_intercept: bool = True,
|
|
24
|
+
max_iter: int = 1000,
|
|
25
|
+
tol: float = 1e-4,
|
|
26
|
+
device: Union[str, Device] = Device.AUTO,
|
|
27
|
+
n_jobs: Optional[int] = None,
|
|
28
|
+
cpu_solver: str = "fista",
|
|
29
|
+
solver: str = "auto",
|
|
30
|
+
lipschitz_L: Optional[float] = None,
|
|
31
|
+
gpu_memory_cleanup: bool = False,
|
|
32
|
+
compute_inference: bool = False,
|
|
33
|
+
inference_method: str = "debiased",
|
|
34
|
+
cov_type: str = "nonrobust",
|
|
35
|
+
hac_maxlags: Optional[int] = None,
|
|
36
|
+
stopping: str = "coef_delta",
|
|
37
|
+
lla: bool = True,
|
|
38
|
+
max_lla_iters: int = 50,
|
|
39
|
+
lla_tol: float = 1e-6,
|
|
40
|
+
loss_kwargs: Optional[dict] = None,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(
|
|
43
|
+
loss="logistic",
|
|
44
|
+
penalty=penalty,
|
|
45
|
+
alpha=alpha,
|
|
46
|
+
l1_ratio=l1_ratio,
|
|
47
|
+
penalty_kwargs=penalty_kwargs,
|
|
48
|
+
fit_intercept=fit_intercept,
|
|
49
|
+
max_iter=max_iter,
|
|
50
|
+
tol=tol,
|
|
51
|
+
device=device,
|
|
52
|
+
n_jobs=n_jobs,
|
|
53
|
+
cpu_solver=cpu_solver,
|
|
54
|
+
solver=solver,
|
|
55
|
+
lipschitz_L=lipschitz_L,
|
|
56
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
57
|
+
compute_inference=compute_inference,
|
|
58
|
+
inference_method=inference_method,
|
|
59
|
+
cov_type=cov_type,
|
|
60
|
+
hac_maxlags=hac_maxlags,
|
|
61
|
+
stopping=stopping,
|
|
62
|
+
lla=lla,
|
|
63
|
+
max_lla_iters=max_lla_iters,
|
|
64
|
+
lla_tol=lla_tol,
|
|
65
|
+
loss_kwargs=loss_kwargs,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def predict_proba(self, X):
|
|
69
|
+
if self.coef_ is None:
|
|
70
|
+
raise RuntimeError("Model has not been fitted yet.")
|
|
71
|
+
X = self._prepare_predict_X(X)
|
|
72
|
+
backend_name = self._prediction_backend_name()
|
|
73
|
+
if backend_name == "cupy":
|
|
74
|
+
import cupy as cp
|
|
75
|
+
Xb = cp.asarray(self._to_array(X, Device.CUDA))
|
|
76
|
+
coef = cp.asarray(self.coef_)
|
|
77
|
+
raw = Xb @ coef
|
|
78
|
+
if self._effective_intercept:
|
|
79
|
+
raw += cp.asarray(self.intercept_, dtype=raw.dtype)
|
|
80
|
+
p1 = 1.0 / (1.0 + cp.exp(-cp.clip(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
81
|
+
return cp.column_stack([1.0 - p1, p1])
|
|
82
|
+
if backend_name == "torch":
|
|
83
|
+
import torch
|
|
84
|
+
Xb = self._to_array(X, Device.TORCH, backend="torch").to(torch.float64)
|
|
85
|
+
coef = torch.as_tensor(self.coef_, dtype=Xb.dtype, device=Xb.device)
|
|
86
|
+
raw = Xb @ coef
|
|
87
|
+
if self._effective_intercept:
|
|
88
|
+
raw = raw + torch.as_tensor(
|
|
89
|
+
self.intercept_, dtype=raw.dtype, device=raw.device
|
|
90
|
+
)
|
|
91
|
+
p1 = 1.0 / (1.0 + torch.exp(-torch.clamp(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
92
|
+
return torch.column_stack([1.0 - p1, p1])
|
|
93
|
+
raw = X @ self.coef_
|
|
94
|
+
if self._effective_intercept:
|
|
95
|
+
raw += self.intercept_
|
|
96
|
+
p1 = 1.0 / (1.0 + np.exp(-np.clip(raw, -_ETA_CLIP, _ETA_CLIP)))
|
|
97
|
+
return np.column_stack([1.0 - p1, p1])
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Penalized Negative Binomial regression wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from statgpu._config import Device
|
|
7
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PenalizedNegativeBinomialRegression(PenalizedGeneralizedLinearModel):
|
|
11
|
+
"""Penalized Negative Binomial regression with configurable dispersion."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
16
|
+
alpha: float = 1.0,
|
|
17
|
+
l1_ratio: float = 0.5,
|
|
18
|
+
penalty_kwargs: Optional[dict] = None,
|
|
19
|
+
fit_intercept: bool = True,
|
|
20
|
+
max_iter: int = 1000,
|
|
21
|
+
tol: float = 1e-4,
|
|
22
|
+
device: Union[str, Device] = Device.AUTO,
|
|
23
|
+
n_jobs: Optional[int] = None,
|
|
24
|
+
cpu_solver: str = "fista",
|
|
25
|
+
solver: str = "auto",
|
|
26
|
+
lipschitz_L: Optional[float] = None,
|
|
27
|
+
gpu_memory_cleanup: bool = False,
|
|
28
|
+
compute_inference: bool = False,
|
|
29
|
+
inference_method: str = "debiased",
|
|
30
|
+
cov_type: str = "nonrobust",
|
|
31
|
+
hac_maxlags: Optional[int] = None,
|
|
32
|
+
stopping: str = "coef_delta",
|
|
33
|
+
lla: bool = True,
|
|
34
|
+
max_lla_iters: int = 50,
|
|
35
|
+
lla_tol: float = 1e-6,
|
|
36
|
+
alpha_nb: float = 1.0,
|
|
37
|
+
loss_kwargs: Optional[dict] = None,
|
|
38
|
+
):
|
|
39
|
+
_loss_kwargs = dict(loss_kwargs) if loss_kwargs else {}
|
|
40
|
+
_loss_kwargs.setdefault("alpha", alpha_nb)
|
|
41
|
+
super().__init__(
|
|
42
|
+
loss="negative_binomial",
|
|
43
|
+
penalty=penalty,
|
|
44
|
+
alpha=alpha,
|
|
45
|
+
l1_ratio=l1_ratio,
|
|
46
|
+
penalty_kwargs=penalty_kwargs,
|
|
47
|
+
fit_intercept=fit_intercept,
|
|
48
|
+
max_iter=max_iter,
|
|
49
|
+
tol=tol,
|
|
50
|
+
device=device,
|
|
51
|
+
n_jobs=n_jobs,
|
|
52
|
+
cpu_solver=cpu_solver,
|
|
53
|
+
solver=solver,
|
|
54
|
+
lipschitz_L=lipschitz_L,
|
|
55
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
56
|
+
compute_inference=compute_inference,
|
|
57
|
+
inference_method=inference_method,
|
|
58
|
+
cov_type=cov_type,
|
|
59
|
+
hac_maxlags=hac_maxlags,
|
|
60
|
+
stopping=stopping,
|
|
61
|
+
lla=lla,
|
|
62
|
+
max_lla_iters=max_lla_iters,
|
|
63
|
+
lla_tol=lla_tol,
|
|
64
|
+
loss_kwargs=_loss_kwargs,
|
|
65
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Penalized Poisson regression wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from statgpu._config import Device
|
|
7
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PenalizedPoissonRegression(PenalizedGeneralizedLinearModel):
|
|
11
|
+
"""Poisson penalized GLM."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
16
|
+
alpha: float = 1.0,
|
|
17
|
+
l1_ratio: float = 0.5,
|
|
18
|
+
penalty_kwargs: Optional[dict] = None,
|
|
19
|
+
fit_intercept: bool = True,
|
|
20
|
+
max_iter: int = 1000,
|
|
21
|
+
tol: float = 1e-4,
|
|
22
|
+
device: Union[str, Device] = Device.AUTO,
|
|
23
|
+
n_jobs: Optional[int] = None,
|
|
24
|
+
cpu_solver: str = "fista",
|
|
25
|
+
solver: str = "auto",
|
|
26
|
+
lipschitz_L: Optional[float] = None,
|
|
27
|
+
gpu_memory_cleanup: bool = False,
|
|
28
|
+
compute_inference: bool = False,
|
|
29
|
+
inference_method: str = "debiased",
|
|
30
|
+
cov_type: str = "nonrobust",
|
|
31
|
+
hac_maxlags: Optional[int] = None,
|
|
32
|
+
stopping: str = "coef_delta",
|
|
33
|
+
lla: bool = True,
|
|
34
|
+
max_lla_iters: int = 50,
|
|
35
|
+
lla_tol: float = 1e-6,
|
|
36
|
+
loss_kwargs: Optional[dict] = None,
|
|
37
|
+
):
|
|
38
|
+
super().__init__(
|
|
39
|
+
loss="poisson",
|
|
40
|
+
penalty=penalty,
|
|
41
|
+
alpha=alpha,
|
|
42
|
+
l1_ratio=l1_ratio,
|
|
43
|
+
penalty_kwargs=penalty_kwargs,
|
|
44
|
+
fit_intercept=fit_intercept,
|
|
45
|
+
max_iter=max_iter,
|
|
46
|
+
tol=tol,
|
|
47
|
+
device=device,
|
|
48
|
+
n_jobs=n_jobs,
|
|
49
|
+
cpu_solver=cpu_solver,
|
|
50
|
+
solver=solver,
|
|
51
|
+
lipschitz_L=lipschitz_L,
|
|
52
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
53
|
+
compute_inference=compute_inference,
|
|
54
|
+
inference_method=inference_method,
|
|
55
|
+
cov_type=cov_type,
|
|
56
|
+
hac_maxlags=hac_maxlags,
|
|
57
|
+
stopping=stopping,
|
|
58
|
+
lla=lla,
|
|
59
|
+
max_lla_iters=max_lla_iters,
|
|
60
|
+
lla_tol=lla_tol,
|
|
61
|
+
loss_kwargs=loss_kwargs,
|
|
62
|
+
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Penalized Tweedie regression wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, Union
|
|
6
|
+
from statgpu._config import Device
|
|
7
|
+
from statgpu.linear_model.penalized._base import PenalizedGeneralizedLinearModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PenalizedTweedieRegression(PenalizedGeneralizedLinearModel):
|
|
11
|
+
"""Penalized Tweedie regression with configurable power."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
penalty: Union[str, "Penalty"] = "l2",
|
|
16
|
+
alpha: float = 1.0,
|
|
17
|
+
l1_ratio: float = 0.5,
|
|
18
|
+
penalty_kwargs: Optional[dict] = None,
|
|
19
|
+
fit_intercept: bool = True,
|
|
20
|
+
max_iter: int = 1000,
|
|
21
|
+
tol: float = 1e-4,
|
|
22
|
+
device: Union[str, Device] = Device.AUTO,
|
|
23
|
+
n_jobs: Optional[int] = None,
|
|
24
|
+
cpu_solver: str = "fista",
|
|
25
|
+
solver: str = "auto",
|
|
26
|
+
lipschitz_L: Optional[float] = None,
|
|
27
|
+
gpu_memory_cleanup: bool = False,
|
|
28
|
+
compute_inference: bool = False,
|
|
29
|
+
inference_method: str = "debiased",
|
|
30
|
+
cov_type: str = "nonrobust",
|
|
31
|
+
hac_maxlags: Optional[int] = None,
|
|
32
|
+
stopping: str = "coef_delta",
|
|
33
|
+
lla: bool = True,
|
|
34
|
+
max_lla_iters: int = 50,
|
|
35
|
+
lla_tol: float = 1e-6,
|
|
36
|
+
power: float = 1.5,
|
|
37
|
+
loss_kwargs: Optional[dict] = None,
|
|
38
|
+
):
|
|
39
|
+
_loss_kwargs = dict(loss_kwargs) if loss_kwargs else {}
|
|
40
|
+
_loss_kwargs.setdefault("power", power)
|
|
41
|
+
super().__init__(
|
|
42
|
+
loss="tweedie",
|
|
43
|
+
penalty=penalty,
|
|
44
|
+
alpha=alpha,
|
|
45
|
+
l1_ratio=l1_ratio,
|
|
46
|
+
penalty_kwargs=penalty_kwargs,
|
|
47
|
+
fit_intercept=fit_intercept,
|
|
48
|
+
max_iter=max_iter,
|
|
49
|
+
tol=tol,
|
|
50
|
+
device=device,
|
|
51
|
+
n_jobs=n_jobs,
|
|
52
|
+
cpu_solver=cpu_solver,
|
|
53
|
+
solver=solver,
|
|
54
|
+
lipschitz_L=lipschitz_L,
|
|
55
|
+
gpu_memory_cleanup=gpu_memory_cleanup,
|
|
56
|
+
compute_inference=compute_inference,
|
|
57
|
+
inference_method=inference_method,
|
|
58
|
+
cov_type=cov_type,
|
|
59
|
+
hac_maxlags=hac_maxlags,
|
|
60
|
+
stopping=stopping,
|
|
61
|
+
lla=lla,
|
|
62
|
+
max_lla_iters=max_lla_iters,
|
|
63
|
+
lla_tol=lla_tol,
|
|
64
|
+
loss_kwargs=_loss_kwargs,
|
|
65
|
+
)
|