PyPI - statgpu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

statgpu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (168) hide show

statgpu/__init__.py +174 -0
statgpu/_base.py +544 -0
statgpu/_config.py +127 -0
statgpu/anova/__init__.py +5 -0
statgpu/anova/_oneway.py +194 -0
statgpu/backends/__init__.py +83 -0
statgpu/backends/_array_ops.py +529 -0
statgpu/backends/_base.py +184 -0
statgpu/backends/_cupy.py +453 -0
statgpu/backends/_factory.py +65 -0
statgpu/backends/_gpu_inference_cupy.py +214 -0
statgpu/backends/_gpu_inference_torch.py +422 -0
statgpu/backends/_numpy.py +324 -0
statgpu/backends/_torch.py +685 -0
statgpu/backends/_torch_safe.py +47 -0
statgpu/backends/_utils.py +423 -0
statgpu/core/__init__.py +10 -0
statgpu/core/formula/__init__.py +33 -0
statgpu/core/formula/_design.py +99 -0
statgpu/core/formula/_parser.py +191 -0
statgpu/core/formula/_terms.py +70 -0
statgpu/core/formula/tests/__init__.py +0 -0
statgpu/core/formula/tests/test_parser.py +194 -0
statgpu/covariance/__init__.py +6 -0
statgpu/covariance/_empirical.py +310 -0
statgpu/covariance/_shrinkage.py +248 -0
statgpu/cross_validation/__init__.py +31 -0
statgpu/cross_validation/_base.py +410 -0
statgpu/cross_validation/_engine.py +167 -0
statgpu/diagnostics/__init__.py +7 -0
statgpu/diagnostics/_regression_diagnostics.py +188 -0
statgpu/feature_selection/__init__.py +24 -0
statgpu/feature_selection/_knockoff.py +870 -0
statgpu/feature_selection/_knockoff_utils.py +1003 -0
statgpu/feature_selection/_stepwise.py +300 -0
statgpu/glm_core/__init__.py +81 -0
statgpu/glm_core/_base.py +202 -0
statgpu/glm_core/_family.py +362 -0
statgpu/glm_core/_fused.py +149 -0
statgpu/glm_core/_gamma.py +111 -0
statgpu/glm_core/_inverse_gaussian.py +62 -0
statgpu/glm_core/_irls.py +561 -0
statgpu/glm_core/_logistic.py +82 -0
statgpu/glm_core/_negative_binomial.py +68 -0
statgpu/glm_core/_poisson.py +60 -0
statgpu/glm_core/_solver_legacy.py +100 -0
statgpu/glm_core/_squared.py +53 -0
statgpu/glm_core/_tweedie.py +74 -0
statgpu/inference/__init__.py +239 -0
statgpu/inference/_distributions_backend.py +2610 -0
statgpu/inference/_multiple_testing.py +391 -0
statgpu/inference/_resampling.py +1400 -0
statgpu/inference/_results.py +265 -0
statgpu/linear_model/__init__.py +75 -0
statgpu/linear_model/_gaussian_inference.py +306 -0
statgpu/linear_model/_glm_base.py +1261 -0
statgpu/linear_model/_ordered_logit.py +52 -0
statgpu/linear_model/_ordered_probit.py +50 -0
statgpu/linear_model/_stats.py +170 -0
statgpu/linear_model/cv/__init__.py +13 -0
statgpu/linear_model/cv/_elasticnet_cv.py +892 -0
statgpu/linear_model/cv/_lasso_cv.py +253 -0
statgpu/linear_model/cv/_logistic_cv.py +895 -0
statgpu/linear_model/cv/_ridge_cv.py +1160 -0
statgpu/linear_model/legacy/__init__.py +1 -0
statgpu/linear_model/legacy/_distributions_legacy_gpu.py +340 -0
statgpu/linear_model/legacy/_elasticnet_legacy.py +936 -0
statgpu/linear_model/legacy/_lasso_legacy.py +4876 -0
statgpu/linear_model/legacy/_penalized_legacy.py +1174 -0
statgpu/linear_model/legacy/_ridge_legacy.py +863 -0
statgpu/linear_model/legacy/_solver_legacy.py +104 -0
statgpu/linear_model/penalized/__init__.py +25 -0
statgpu/linear_model/penalized/_base.py +437 -0
statgpu/linear_model/penalized/_fit_mixin.py +1877 -0
statgpu/linear_model/penalized/_inference_mixin.py +1179 -0
statgpu/linear_model/penalized/_penalized_cv.py +2699 -0
statgpu/linear_model/penalized/_penalized_gamma.py +86 -0
statgpu/linear_model/penalized/_penalized_inverse_gaussian.py +62 -0
statgpu/linear_model/penalized/_penalized_linear.py +236 -0
statgpu/linear_model/penalized/_penalized_logistic.py +100 -0
statgpu/linear_model/penalized/_penalized_negative_binomial.py +65 -0
statgpu/linear_model/penalized/_penalized_poisson.py +62 -0
statgpu/linear_model/penalized/_penalized_tweedie.py +65 -0
statgpu/linear_model/penalized/_predict_mixin.py +182 -0
statgpu/linear_model/wrappers/__init__.py +31 -0
statgpu/linear_model/wrappers/_adaptive_lasso.py +63 -0
statgpu/linear_model/wrappers/_elasticnet.py +75 -0
statgpu/linear_model/wrappers/_gamma.py +67 -0
statgpu/linear_model/wrappers/_inverse_gaussian.py +47 -0
statgpu/linear_model/wrappers/_lasso.py +2124 -0
statgpu/linear_model/wrappers/_linear.py +1127 -0
statgpu/linear_model/wrappers/_logistic.py +1435 -0
statgpu/linear_model/wrappers/_mcp.py +58 -0
statgpu/linear_model/wrappers/_negative_binomial.py +58 -0
statgpu/linear_model/wrappers/_poisson.py +48 -0
statgpu/linear_model/wrappers/_ridge.py +166 -0
statgpu/linear_model/wrappers/_scad.py +58 -0
statgpu/linear_model/wrappers/_tweedie.py +57 -0
statgpu/metrics/__init__.py +21 -0
statgpu/metrics/_classification.py +591 -0
statgpu/nonparametric/__init__.py +50 -0
statgpu/nonparametric/kernel_methods/__init__.py +25 -0
statgpu/nonparametric/kernel_methods/_kernels.py +246 -0
statgpu/nonparametric/kernel_methods/_krr.py +234 -0
statgpu/nonparametric/kernel_methods/_krr_cv.py +380 -0
statgpu/nonparametric/kernel_smoothing/__init__.py +39 -0
statgpu/nonparametric/kernel_smoothing/_bandwidth_selection.py +1083 -0
statgpu/nonparametric/kernel_smoothing/_kde.py +761 -0
statgpu/nonparametric/kernel_smoothing/_kernel_common.py +348 -0
statgpu/nonparametric/kernel_smoothing/_kernel_regression.py +748 -0
statgpu/nonparametric/splines/__init__.py +5 -0
statgpu/nonparametric/splines/_bspline_basis.py +336 -0
statgpu/nonparametric/splines/_penalized.py +349 -0
statgpu/panel/__init__.py +19 -0
statgpu/panel/_covariance.py +140 -0
statgpu/panel/_fixed_effects.py +420 -0
statgpu/panel/_random_effects.py +385 -0
statgpu/panel/_utils.py +482 -0
statgpu/penalties/__init__.py +139 -0
statgpu/penalties/_adaptive_l1.py +313 -0
statgpu/penalties/_base.py +261 -0
statgpu/penalties/_categories.py +39 -0
statgpu/penalties/_elasticnet.py +98 -0
statgpu/penalties/_group_lasso.py +678 -0
statgpu/penalties/_group_mcp.py +553 -0
statgpu/penalties/_group_scad.py +605 -0
statgpu/penalties/_l1.py +107 -0
statgpu/penalties/_l2.py +77 -0
statgpu/penalties/_mcp.py +237 -0
statgpu/penalties/_scad.py +260 -0
statgpu/semiparametric/__init__.py +5 -0
statgpu/semiparametric/_gam.py +401 -0
statgpu/solvers/__init__.py +24 -0
statgpu/solvers/_admm.py +241 -0
statgpu/solvers/_constants.py +15 -0
statgpu/solvers/_convergence.py +6 -0
statgpu/solvers/_fista.py +436 -0
statgpu/solvers/_fista_bb.py +513 -0
statgpu/solvers/_fista_lla.py +541 -0
statgpu/solvers/_lbfgs.py +206 -0
statgpu/solvers/_newton.py +149 -0
statgpu/solvers/_utils.py +277 -0
statgpu/survival/__init__.py +14 -0
statgpu/survival/_cox.py +3974 -0
statgpu/survival/_cox_breslow_triton_kernel.py +106 -0
statgpu/survival/_cox_cv.py +1159 -0
statgpu/survival/_cox_efron_cuda.py +1280 -0
statgpu/survival/_cox_efron_triton.py +359 -0
statgpu/unsupervised/__init__.py +29 -0
statgpu/unsupervised/_agglomerative.py +307 -0
statgpu/unsupervised/_dbscan.py +263 -0
statgpu/unsupervised/_dbscan_cpu.pyx +125 -0
statgpu/unsupervised/_gmm.py +332 -0
statgpu/unsupervised/_incremental_pca.py +176 -0
statgpu/unsupervised/_kmeans.py +261 -0
statgpu/unsupervised/_minibatch_kmeans.py +299 -0
statgpu/unsupervised/_minibatch_nmf.py +252 -0
statgpu/unsupervised/_nmf.py +190 -0
statgpu/unsupervised/_pca.py +189 -0
statgpu/unsupervised/_truncated_svd.py +132 -0
statgpu/unsupervised/_tsne.py +192 -0
statgpu/unsupervised/_umap.py +224 -0
statgpu/unsupervised/_utils.py +134 -0
statgpu-0.1.0.dist-info/METADATA +245 -0
statgpu-0.1.0.dist-info/RECORD +168 -0
statgpu-0.1.0.dist-info/WHEEL +5 -0
statgpu-0.1.0.dist-info/licenses/LICENSE +199 -0
statgpu-0.1.0.dist-info/top_level.txt +1 -0

statgpu/panel/_covariance.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""
+Clustered covariance estimators for panel data models.
+Implements one-way and two-way clustered standard errors following
+Cameron & Miller (2015) and Cameron, Gelbach & Miller (2011).
+"""
+from __future__ import annotations
+__all__ = ["clustered_covariance", "two_way_clustered_covariance"]
+from typing import Optional
+import numpy as np
+from statgpu.backends import (
+    _LINALG_ERRORS,
+    _get_torch_device_str,
+    _torch_dev,
+    _to_numpy,
+    xp_asarray,
+    xp_zeros,
+)
+def _ensure_xp(xp=None):
+    """Return the array module, defaulting to numpy."""
+    return xp if xp is not None else np
+def clustered_covariance(X, resid, clusters, xp=None):
+    """One-way clustered robust covariance matrix.
+    Implements the cluster-robust sandwich estimator:
+        V = (X'X/n)^{-1} @ meat @ (X'X/n)^{-1}
+    where ``meat = sum_g (X_g' e_g)(X_g' e_g)'`` summed over clusters.
+    Parameters
+    ----------
+    X : array-like, shape (n, k)
+        Design matrix (including intercept if applicable).
+    resid : array-like, shape (n,)
+        OLS residuals.
+    clusters : array-like, shape (n,)
+        Cluster assignment labels (integer or categorical).
+    xp : module, optional
+        Array module (numpy / cupy / torch).  Defaults to numpy.
+    Returns
+    -------
+    V : array, shape (k, k)
+        Cluster-robust covariance matrix of the coefficient estimates.
+    """
+    xp = _ensure_xp(xp)
+    X = xp_asarray(X, dtype=xp.float64, xp=xp)
+    resid = xp_asarray(resid, dtype=xp.float64, xp=xp, ref_arr=X).ravel()
+    clusters = xp_asarray(clusters, xp=xp, ref_arr=X).ravel()
+    n, k = X.shape
+    # Bread: (X'X / n)^{-1}
+    XtX = X.T @ X / n
+    try:
+        bread = xp.linalg.inv(XtX)
+    except _LINALG_ERRORS:
+        bread = xp.linalg.pinv(XtX)
+    # Meat: sum over clusters of (X_g' e_g)(X_g' e_g)'
+    # Batch-transfer unique cluster values to CPU (single sync, not per-cluster)
+    unique_clusters_cpu = _to_numpy(xp.unique(clusters)).tolist()
+    meat = xp_zeros((k, k), xp.float64, xp, X)
+    for g_val in unique_clusters_cpu:
+        mask = clusters == g_val
+        Xg = X[mask]
+        eg = resid[mask]
+        Xe = Xg.T @ eg  # shape (k,)
+        meat = meat + xp.outer(Xe, Xe)
+    # Sandwich: V = bread @ meat @ bread / n^2
+    V = bread @ meat @ bread / (n * n)
+    return V
+def two_way_clustered_covariance(X, resid, cluster1, cluster2, xp=None):
+    """Two-way clustered robust covariance matrix.
+    Implements the Cameron, Gelbach & Miller (2011) intersection
+    correction::
+        V = V_cluster1 + V_cluster2 - V_intersection
+    where the intersection clusters are formed from all unique
+    ``(cluster1, cluster2)`` pairs.
+    Parameters
+    ----------
+    X : array-like, shape (n, k)
+        Design matrix.
+    resid : array-like, shape (n,)
+        OLS residuals.
+    cluster1 : array-like, shape (n,)
+        First cluster dimension (e.g. entity).  Accepts integer or
+        categorical labels (will be factorized to integers internally).
+    cluster2 : array-like, shape (n,)
+        Second cluster dimension (e.g. time).  Same as cluster1.
+    xp : module, optional
+        Array module (numpy / cupy / torch).  Defaults to numpy.
+    Returns
+    -------
+    V : array, shape (k, k)
+        Two-way cluster-robust covariance matrix.
+    """
+    xp = _ensure_xp(xp)
+    V1 = clustered_covariance(X, resid, cluster1, xp)
+    V2 = clustered_covariance(X, resid, cluster2, xp)
+    # Intersection clusters: unique (c1, c2) pairs via Cantor-pair hash
+    # Factorize labels to integers (supports string/categorical labels)
+    c1_raw = _to_numpy(xp_asarray(cluster1, xp=xp, ref_arr=V1).ravel())
+    c2_raw = _to_numpy(xp_asarray(cluster2, xp=xp, ref_arr=V1).ravel())
+    _, c1 = np.unique(c1_raw, return_inverse=True)
+    _, c2 = np.unique(c2_raw, return_inverse=True)
+    # Use Python int for Cantor-pair to avoid int64 overflow with
+    # large cluster counts (>~3 billion unique combinations).
+    c1_int = [int(x) for x in c1]
+    c2_int = [int(x) for x in c2]
+    combined_np = np.array(
+        [s * (s + 1) // 2 + c2i for s, c2i in zip(
+            [a + b for a, b in zip(c1_int, c2_int)], c2_int
+        )],
+        dtype=np.int64,
+    )
+    combined = xp_asarray(combined_np, dtype=xp.int64, xp=xp, ref_arr=V1)
+    V12 = clustered_covariance(X, resid, combined, xp)
+    return V1 + V2 - V12

statgpu/panel/_fixed_effects.py ADDED Viewed

@@ -0,0 +1,420 @@
+"""
+Fixed effects panel data model (PanelOLS).
+Implements one-way and two-way fixed effects estimation with support
+for non-robust, HC1 robust, and clustered standard errors.  GPU
+acceleration is provided transparently via the statgpu backend system.
+"""
+from __future__ import annotations
+__all__ = ["PanelOLS"]
+from typing import Optional, Union
+import numpy as np
+from scipy import stats
+from statgpu._base import BaseEstimator
+from statgpu._config import Device
+from statgpu.backends import _LINALG_ERRORS, _get_torch_device_str, _torch_dev, _to_float_scalar, _to_numpy, xp_astype, xp_cholesky_solve
+from statgpu.panel._utils import PanelSummary, _scatter_add, demean_variables
+from statgpu.panel._covariance import clustered_covariance, two_way_clustered_covariance
+class PanelOLS(BaseEstimator):
+    """Fixed effects estimator for panel data.
+    Supports entity (individual) fixed effects, time fixed effects,
+    and two-way fixed effects via the within transformation.
+    Parameters
+    ----------
+    entity_effects : bool, default=False
+        Include entity (individual) fixed effects.
+    time_effects : bool, default=False
+        Include time fixed effects.
+    cov_type : str, default='nonrobust'
+        Covariance estimator: ``'nonrobust'``, ``'robust'`` (HC1), or
+        ``'clustered'``.
+    device : str or Device, default='auto'
+        Computation device.
+    Attributes
+    ----------
+    coef_ : ndarray, shape (k,)
+        Estimated slope coefficients.
+    bse_ : ndarray, shape (k,)
+        Standard errors.
+    tvalues_ : ndarray, shape (k,)
+        t-statistics.
+    pvalues_ : ndarray, shape (k,)
+        Two-sided p-values.
+    conf_int_ : ndarray, shape (k, 2)
+        95 % confidence intervals.
+    rsquared_within : float
+        Within R-squared (variance explained by regressors after demeaning).
+    nobs : int
+        Number of observations used in estimation.
+    df_resid : int
+        Residual degrees of freedom.
+    """
+    def __init__(
+        self,
+        entity_effects: bool = False,
+        time_effects: bool = False,
+        cov_type: str = 'nonrobust',
+        alpha: float = 0.05,
+        device: Union[str, Device] = Device.AUTO,
+        n_jobs: Optional[int] = None,
+    ):
+        super().__init__(device=device, n_jobs=n_jobs)
+        self.entity_effects = entity_effects
+        self.time_effects = time_effects
+        self.cov_type = cov_type.lower()
+        self.alpha = alpha
+        if self.cov_type not in ('nonrobust', 'robust', 'clustered'):
+            raise ValueError(
+                "cov_type must be 'nonrobust', 'robust', or 'clustered'"
+            )
+        # Public attributes set by fit()
+        self.coef_ = None
+        self.bse_ = None
+        self.tvalues_ = None
+        self.pvalues_ = None
+        self.conf_int_ = None
+        self.rsquared_within = None
+        self.nobs = None
+        self.df_resid = None
+        # Internal storage
+        self._params = None
+        self._scale = None
+        self._entity_effects_map = {}
+        self._time_effects_map = {}
+    def fit(self, X, y, entity_ids=None, time_ids=None, cluster=None):
+        """Fit the fixed effects model.
+        Parameters
+        ----------
+        X : array-like, shape (n, k)
+            Regressor matrix. Include a constant column if you want an
+            intercept (the model does not add one automatically).
+        y : array-like, shape (n,)
+            Outcome vector.
+        entity_ids : array-like, shape (n,), optional
+            Entity (individual) identifiers.  Required when
+            ``entity_effects=True``.
+        time_ids : array-like, shape (n,), optional
+            Time-period identifiers.  Required when ``time_effects=True``.
+        cluster : array-like, shape (n,), optional
+            Cluster labels for clustered standard errors.  Required when
+            ``cov_type='clustered'``.
+        Returns
+        -------
+        self
+        """
+        # Resolve backend
+        backend = self._get_backend(backend='auto')
+        backend_name = backend.name
+        xp = backend.xp
+        # Convert inputs to backend arrays
+        y_arr = xp_astype(self._to_array(y, backend=backend_name).ravel(), xp.float64, xp)
+        X_arr = xp_astype(self._to_array(X, backend=backend_name), xp.float64, xp)
+        if X_arr.ndim == 1:
+            X_arr = X_arr.reshape(-1, 1)
+        n, k = X_arr.shape
+        self.nobs = n
+        # Validate shapes
+        if y_arr.shape[0] != n:
+            raise ValueError(
+                f"y has {y_arr.shape[0]} observations but X has {n} rows"
+            )
+        # Validate
+        if self.entity_effects and entity_ids is None:
+            raise ValueError("entity_ids is required when entity_effects=True")
+        if self.time_effects and time_ids is None:
+            raise ValueError("time_ids is required when time_effects=True")
+        if self.cov_type == 'clustered' and cluster is None:
+            raise ValueError("cluster is required when cov_type='clustered'")
+        entity_arr = None
+        time_arr = None
+        if entity_ids is not None:
+            entity_arr = self._to_array(entity_ids, backend=backend_name).ravel()
+        if time_ids is not None:
+            time_arr = self._to_array(time_ids, backend=backend_name).ravel()
+        # Demean if fixed effects requested
+        if self.entity_effects or self.time_effects:
+            y_d, X_d = demean_variables(
+                y_arr, X_arr,
+                entity_ids=entity_arr if self.entity_effects else None,
+                time_ids=time_arr if self.time_effects else None,
+                xp=xp,
+            )
+        else:
+            y_d = y_arr
+            X_d = X_arr
+        # OLS on demeaned data: beta = (X'X)^{-1} X'y
+        XtX = X_d.T @ X_d
+        Xty = X_d.T @ y_d
+        try:
+            coef = xp_cholesky_solve(XtX, Xty, xp)
+        except _LINALG_ERRORS:
+            coef = xp.linalg.solve(XtX, Xty)
+        # Degrees of freedom
+        n_entities = len(xp.unique(entity_arr)) if entity_arr is not None else 0
+        n_times = len(xp.unique(time_arr)) if time_arr is not None else 0
+        n_effects = 0
+        if self.entity_effects:
+            n_effects += n_entities - 1
+        if self.time_effects:
+            n_effects += n_times - 1
+        self.df_resid = n - k - n_effects
+        if self.df_resid <= 0:
+            raise ValueError(
+                f"Not enough observations: n={n}, k={k}, n_effects={n_effects}, "
+                f"df_resid={self.df_resid}.  Check that N*T >> k + effects."
+            )
+        # Residuals and scale (on the demeaned data, all on device)
+        y_pred = X_d @ coef
+        resid = y_d - y_pred
+        scale = _to_float_scalar(xp.sum(resid ** 2)) / self.df_resid
+        self._scale = scale
+        # Compute entity/time effects for predict()
+        # Subtract grand mean to avoid double-counting in two-way FE
+        self._entity_effects_map = {}
+        self._time_effects_map = {}
+        resid_orig = y_arr - X_arr @ coef
+        grand_mean = float(xp.mean(resid_orig))
+        resid_centered = resid_orig - grand_mean
+        self._grand_mean = grand_mean
+        if self.entity_effects and entity_arr is not None:
+            ent_np = _to_numpy(entity_arr).ravel()
+            unique_ent, idx_np = np.unique(ent_np, return_inverse=True)
+            idx_dev = xp.asarray(idx_np, dtype=xp.int64)
+            ent_sums = _scatter_add(xp, idx_dev, resid_centered, len(unique_ent))
+            ent_counts = _scatter_add(xp, idx_dev, xp.ones_like(resid_centered), len(unique_ent))
+            ent_effects = _to_numpy(ent_sums / xp.maximum(ent_counts, 1.0)).ravel()
+            for i, eid in enumerate(unique_ent):
+                self._entity_effects_map[eid] = float(ent_effects[i])
+        if self.time_effects and time_arr is not None:
+            time_np = _to_numpy(time_arr).ravel()
+            unique_time, idx_np = np.unique(time_np, return_inverse=True)
+            idx_dev = xp.asarray(idx_np, dtype=xp.int64)
+            time_sums = _scatter_add(xp, idx_dev, resid_centered, len(unique_time))
+            time_counts = _scatter_add(xp, idx_dev, xp.ones_like(resid_centered), len(unique_time))
+            time_effects = _to_numpy(time_sums / xp.maximum(time_counts, 1.0)).ravel()
+            for i, tid in enumerate(unique_time):
+                self._time_effects_map[tid] = float(time_effects[i])
+        # Keep arrays on device for inference — only transfer final results
+        self._compute_inference(xp, cluster, backend_name,
+                                X_d, coef, resid, y_d)
+        # Single batch transfer of final results to CPU
+        self._params = _to_numpy(coef).ravel()
+        self.coef_ = self._params
+        self._fitted = True
+        return self
+    def _compute_inference(self, xp, cluster, backend_name,
+                           X_d, coef, resid, y_d):
+        """Compute SE, t-values, p-values, and CIs — all on device.
+        Uses statgpu's backend-agnostic inference framework for p-values,
+        so no GPU→CPU transfer is needed for the computation.  Only the
+        final numpy result vectors are stored for the user API.
+        """
+        from statgpu.inference._distributions_backend import get_distribution
+        n, k = X_d.shape
+        df = self.df_resid
+        alpha = self.alpha
+        # XtX and its inverse — on device
+        XtX = X_d.T @ X_d
+        try:
+            XtX_inv = xp.linalg.inv(XtX)
+        except _LINALG_ERRORS:
+            XtX_inv = xp.linalg.pinv(XtX)
+        if self.cov_type == 'nonrobust':
+            cov_params = self._scale * XtX_inv
+            bse_dev = xp.sqrt(xp.maximum(xp.diag(cov_params), 0.0))
+        elif self.cov_type == 'robust':
+            # HC1 sandwich — on device
+            # Use df_resid (not n-k) to account for absorbed fixed effects
+            e2 = resid ** 2
+            Xw = X_d * e2[:, None]
+            meat = X_d.T @ Xw
+            cov_params = XtX_inv @ meat @ XtX_inv
+            if self.df_resid > 0:
+                cov_params = cov_params * (n / self.df_resid)
+            bse_dev = xp.sqrt(xp.maximum(xp.diag(cov_params), 0.0))
+        else:  # clustered
+            cluster_np = _to_numpy(cluster)
+            # Validate cluster length matches fitted data
+            if len(cluster_np) != X_d.shape[0]:
+                raise ValueError(
+                    f"cluster length ({len(cluster_np)}) does not match "
+                    f"data length ({X_d.shape[0]})"
+                )
+            if cluster_np.ndim == 2 and cluster_np.shape[1] == 2:
+                V = two_way_clustered_covariance(
+                    X_d, resid, cluster_np[:, 0], cluster_np[:, 1], xp=xp
+                )
+            else:
+                V = clustered_covariance(X_d, resid, cluster_np, xp=xp)
+            bse_dev = xp.sqrt(xp.maximum(xp.diag(V), 0.0))
+        # t-values — on device
+        _eps = xp.finfo(xp.float64).tiny if hasattr(xp, 'finfo') else 2.2e-308
+        tvalues_dev = coef / xp.maximum(bse_dev, _eps)
+        abs_t = xp.abs(tvalues_dev)
+        # p-values via backend-agnostic inference framework — on device
+        if self.cov_type in ('nonrobust',):
+            t_dist = get_distribution("t", backend=backend_name)
+            pvalues_dev = 2.0 * t_dist.sf(abs_t, float(df))
+            t_crit = float(t_dist.isf(xp.asarray([alpha / 2.0]), float(df))[0])
+        else:
+            norm_dist = get_distribution("norm", backend=backend_name)
+            pvalues_dev = 2.0 * norm_dist.sf(abs_t)
+            t_crit = float(norm_dist.isf(xp.asarray([alpha / 2.0]))[0])
+        # Final transfer: only k-length vectors to CPU for storage
+        self.bse_ = _to_numpy(bse_dev).ravel()
+        self.tvalues_ = _to_numpy(tvalues_dev).ravel()
+        self.pvalues_ = _to_numpy(pvalues_dev).ravel()
+        coef_np = _to_numpy(coef).ravel()
+        self.conf_int_ = np.column_stack([
+            coef_np - t_crit * self.bse_,
+            coef_np + t_crit * self.bse_,
+        ])
+        # Within R-squared — on device, single sync
+        ss_res = _to_float_scalar(xp.sum(resid ** 2))
+        y_d_mean = _to_float_scalar(xp.mean(y_d))
+        ss_tot = _to_float_scalar(xp.sum((y_d - y_d_mean) ** 2))
+        self.rsquared_within = 1 - ss_res / ss_tot if ss_tot > 0 else 0.0
+    def predict(self, X, entity_ids=None, time_ids=None):
+        """Predict using the fitted model.
+        If the model was fitted with entity/time effects and the
+        corresponding identifiers are provided, the predictions include
+        the estimated fixed effects.
+        Parameters
+        ----------
+        X : array-like, shape (n, k)
+            Regressor matrix.
+        entity_ids : array-like, shape (n,), optional
+            Entity identifiers.  Required to include entity effects in
+            the prediction.
+        time_ids : array-like, shape (n,), optional
+            Time-period identifiers.  Required to include time effects
+            in the prediction.
+        Returns
+        -------
+        y_pred : ndarray, shape (n,)
+            Predicted values.
+        """
+        self._check_is_fitted()
+        X_arr = np.asarray(X, dtype=np.float64)
+        if X_arr.ndim == 1:
+            X_arr = X_arr.reshape(-1, 1)
+        y_pred = X_arr @ self.coef_
+        # Add entity effects via vectorized lookup
+        if self._entity_effects_map and entity_ids is not None:
+            ent_arr = np.asarray(entity_ids).ravel()
+            ent_effects = np.vectorize(
+                self._entity_effects_map.get, otypes=[np.float64]
+            )(ent_arr, 0.0)
+            y_pred = y_pred + ent_effects
+        # Add time effects via vectorized lookup
+        if self._time_effects_map and time_ids is not None:
+            time_arr = np.asarray(time_ids).ravel()
+            time_effects = np.vectorize(
+                self._time_effects_map.get, otypes=[np.float64]
+            )(time_arr, 0.0)
+            y_pred = y_pred + time_effects
+        return y_pred
+    def summary(self):
+        """Print and return a structured coefficient summary.
+        Returns
+        -------
+        PanelSummary
+            Dataclass with all model results.  Also prints a formatted
+            table to stdout for interactive use.
+        """
+        self._check_is_fitted()
+        k = len(self._params)
+        feat_names = [f'x{i+1}' for i in range(k)]
+        s = PanelSummary(
+            model_type='PanelOLS',
+            nobs=self.nobs,
+            df_resid=self.df_resid,
+            coef=self._params,
+            bse=self.bse_,
+            tvalues=self.tvalues_,
+            pvalues=self.pvalues_,
+            conf_int=self.conf_int_,
+            feature_names=feat_names,
+            rsquared_within=self.rsquared_within,
+            cov_type=self.cov_type,
+            entity_effects=self.entity_effects,
+            time_effects=self.time_effects,
+            alpha=self.alpha,
+        )
+        print(s)
+        return s
+    def get_params(self, deep=True):
+        """Get parameters for this estimator."""
+        params = super().get_params(deep)
+        params.update({
+            'entity_effects': self.entity_effects,
+            'time_effects': self.time_effects,
+            'cov_type': self.cov_type,
+            'alpha': self.alpha,
+        })
+        return params
+    def set_params(self, **params):
+        """Set parameters for this estimator."""
+        for key in ('entity_effects', 'time_effects', 'cov_type', 'alpha'):
+            if key in params:
+                setattr(self, key, params.pop(key))
+        super().set_params(**params)
+        return self