varguid 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. varguid-0.1.8/.github/dependabot.yml +10 -0
  2. varguid-0.1.8/.github/workflows/ci.yml +76 -0
  3. varguid-0.1.8/.github/workflows/python-publish.yml +70 -0
  4. varguid-0.1.8/.gitignore +26 -0
  5. varguid-0.1.8/.pre-commit-config.yaml +15 -0
  6. varguid-0.1.8/CHANGELOG.md +61 -0
  7. varguid-0.1.8/CITATION.bib +20 -0
  8. varguid-0.1.8/CITATION.cff +40 -0
  9. varguid-0.1.8/CONTRIBUTING.md +53 -0
  10. varguid-0.1.8/FIX_LOG_0.1.8.md +124 -0
  11. varguid-0.1.8/LICENSE +339 -0
  12. varguid-0.1.8/MANIFEST.in +25 -0
  13. varguid-0.1.8/PKG-INFO +266 -0
  14. varguid-0.1.8/PUBLICATION_CHECKLIST.md +59 -0
  15. varguid-0.1.8/PUBLICATION_METADATA_LOG_0.1.8.md +24 -0
  16. varguid-0.1.8/PUBLICATION_VALIDATION_LOG_0.1.8.txt +37 -0
  17. varguid-0.1.8/README.md +215 -0
  18. varguid-0.1.8/RELEASING.md +59 -0
  19. varguid-0.1.8/VALIDATION_LOG_0.1.8.txt +101 -0
  20. varguid-0.1.8/docs/index.md +40 -0
  21. varguid-0.1.8/noxfile.py +30 -0
  22. varguid-0.1.8/pyproject.toml +117 -0
  23. varguid-0.1.8/setup.cfg +4 -0
  24. varguid-0.1.8/src/varguid/__init__.py +17 -0
  25. varguid-0.1.8/src/varguid/data/cobra2d.csv +501 -0
  26. varguid-0.1.8/src/varguid/datasets.py +93 -0
  27. varguid-0.1.8/src/varguid/model.py +879 -0
  28. varguid-0.1.8/src/varguid/py.typed +0 -0
  29. varguid-0.1.8/src/varguid.egg-info/PKG-INFO +266 -0
  30. varguid-0.1.8/src/varguid.egg-info/SOURCES.txt +38 -0
  31. varguid-0.1.8/src/varguid.egg-info/dependency_links.txt +1 -0
  32. varguid-0.1.8/src/varguid.egg-info/requires.txt +22 -0
  33. varguid-0.1.8/src/varguid.egg-info/top_level.txt +1 -0
  34. varguid-0.1.8/tests/benchmarks/README.md +12 -0
  35. varguid-0.1.8/tests/benchmarks/benchmark_statsmodels.py +39 -0
  36. varguid-0.1.8/tests/test_basic.py +179 -0
  37. varguid-0.1.8/tests/test_examples.py +45 -0
  38. varguid-0.1.8/tests/test_r_compatibility.py +93 -0
  39. varguid-0.1.8/tests/test_sklearn.py +9 -0
  40. varguid-0.1.8/tests/test_validation.py +194 -0
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: github-actions
4
+ directory: /
5
+ schedule:
6
+ interval: monthly
7
+ groups:
8
+ actions:
9
+ patterns:
10
+ - "*"
@@ -0,0 +1,76 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ concurrency:
11
+ group: ci-${{ github.workflow }}-${{ github.ref }}
12
+ cancel-in-progress: true
13
+
14
+ jobs:
15
+ tests:
16
+ name: Python ${{ matrix.python-version }}
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: ["3.12", "3.13", "3.14"]
22
+ steps:
23
+ - name: Check out source
24
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
25
+ - name: Set up Python
26
+ uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ cache: pip
30
+ - name: Install package and test dependencies
31
+ run: python -m pip install --upgrade pip && python -m pip install -e ".[dev]"
32
+ - name: Run tests
33
+ run: python -m pytest
34
+
35
+ quality:
36
+ name: Lint and type-check
37
+ runs-on: ubuntu-latest
38
+ steps:
39
+ - name: Check out source
40
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
41
+ - name: Set up Python
42
+ uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
43
+ with:
44
+ python-version: "3.13"
45
+ cache: pip
46
+ - name: Install package and development dependencies
47
+ run: python -m pip install --upgrade pip && python -m pip install -e ".[dev]"
48
+ - name: Ruff
49
+ run: ruff check src tests && ruff format --check src tests
50
+ - name: Mypy
51
+ run: mypy src tests
52
+ - name: Check source manifest
53
+ run: check-manifest
54
+
55
+ build:
56
+ name: Build distributions
57
+ runs-on: ubuntu-latest
58
+ needs: [tests, quality]
59
+ steps:
60
+ - name: Check out source
61
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
62
+ - name: Set up Python
63
+ uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
64
+ with:
65
+ python-version: "3.13"
66
+ cache: pip
67
+ - name: Install build tools
68
+ run: python -m pip install --upgrade pip build twine
69
+ - name: Build and validate
70
+ run: python -m build && python -m twine check dist/*
71
+ - name: Upload distributions
72
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
73
+ with:
74
+ name: python-package-distributions
75
+ path: dist/
76
+ if-no-files-found: error
@@ -0,0 +1,70 @@
1
+ name: Upload Python Package
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ release:
6
+ types: [published]
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ release-build:
13
+ name: Build and validate release distributions
14
+ runs-on: ubuntu-latest
15
+
16
+ steps:
17
+ - name: Check out source
18
+ uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Install build and test tools
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ python -m pip install build twine pytest
29
+ python -m pip install -e ".[dev]"
30
+
31
+ - name: Run tests
32
+ run: python -m pytest -q
33
+
34
+ - name: Build release distributions
35
+ run: python -m build
36
+
37
+ - name: Check release distributions
38
+ run: python -m twine check dist/*
39
+
40
+ - name: Upload distributions
41
+ uses: actions/upload-artifact@v4
42
+ with:
43
+ name: release-dists
44
+ path: dist/
45
+
46
+ pypi-publish:
47
+ name: Publish release distributions to PyPI
48
+ runs-on: ubuntu-latest
49
+ needs:
50
+ - release-build
51
+
52
+ permissions:
53
+ id-token: write
54
+ contents: read
55
+
56
+ environment:
57
+ name: pypi
58
+ url: https://pypi.org/project/varguid/
59
+
60
+ steps:
61
+ - name: Retrieve release distributions
62
+ uses: actions/download-artifact@v4
63
+ with:
64
+ name: release-dists
65
+ path: dist/
66
+
67
+ - name: Publish release distributions to PyPI
68
+ uses: pypa/gh-action-pypi-publish@release/v1
69
+ with:
70
+ packages-dir: dist/
@@ -0,0 +1,26 @@
1
+ # Python bytecode and caches
2
+ __pycache__/
3
+ *.py[cod]
4
+ .pytest_cache/
5
+ .mypy_cache/
6
+ .ruff_cache/
7
+
8
+ # Environments and local tooling
9
+ .venv/
10
+ venv/
11
+ .nox/
12
+
13
+ # Coverage
14
+ .coverage
15
+ .coverage.*
16
+ htmlcov/
17
+
18
+ # Build products and generated metadata
19
+ build/
20
+ dist/
21
+ *.egg-info/
22
+
23
+ # Editors and operating systems
24
+ .DS_Store
25
+ .idea/
26
+ .vscode/
@@ -0,0 +1,15 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.15.18
4
+ hooks:
5
+ - id: ruff-check
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v6.0.0
10
+ hooks:
11
+ - id: end-of-file-fixer
12
+ - id: trailing-whitespace
13
+ - id: check-yaml
14
+ - id: check-toml
15
+ - id: check-added-large-files
@@ -0,0 +1,61 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here.
4
+
5
+ ## 0.1.8 - 2026-06-22
6
+
7
+ ### Fixed
8
+
9
+ - Preserved DataFrame feature identity during prediction by reordering known
10
+ columns and rejecting missing or unexpected columns.
11
+ - Honored no-intercept Patsy formulas such as `y ~ 0 + x1 + x2`.
12
+ - Made formula fitting and prediction reject missing values instead of silently
13
+ dropping affected rows through Patsy.
14
+ - Matched the attached R non-lasso update order after the convergence threshold
15
+ is reached and removed variance-prediction clipping.
16
+ - Added a safe fallback for an exactly zero fitted variance model.
17
+ - Standardized lasso features and added shuffled reproducible CV folds.
18
+ - Added the Scientific Python `rng` interface for functional lasso fits while
19
+ retaining `random_state` as a deprecated compatibility alias.
20
+ - Corrected scikit-learn estimator behavior: mixin order, `NotFittedError`,
21
+ `n_features_in_`, feature-name checks, and fitted attributes.
22
+ - Restored keyword compatibility for `predict(object=...)` and `prd(object=...)`.
23
+ - Corrected the nox build session so Twine receives expanded artifact paths.
24
+ - Preserved the historical `generate_cobra2d(n_features < 10)` behavior by
25
+ expanding the generated design to ten predictors.
26
+
27
+ ### Added
28
+
29
+ - Independent regression tests for the attached R non-lasso algorithm.
30
+ - Full scikit-learn estimator checks.
31
+ - README example, formula, schema-validation, lasso scale-invariance, RNG, and
32
+ exact-fit regression tests.
33
+ - A Trusted Publishing release workflow with least-privilege permissions and
34
+ immutable GitHub Action commit pins.
35
+ - A separate SLSA build-provenance attestation and verification job, plus
36
+ Dependabot tracking for pinned GitHub Actions.
37
+ - A PEP 561 `py.typed` marker and typed-package classifier.
38
+ - Release, contribution, fix-log, and validation documentation.
39
+
40
+ ### Changed
41
+
42
+ - Adopted the publication title `{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python`.
43
+ - Updated canonical repository links to `zionwzz/varguid-python` and added machine-readable and BibTeX citation metadata.
44
+ - Updated a test annotation for compatibility with mypy 2.1.0; runtime behavior is unchanged.
45
+ - Raised the supported Python floor to 3.12 and refreshed core dependency floors
46
+ in line with the project's Scientific Python support policy.
47
+ - CI now targets Python 3.12, 3.13, and 3.14 and separates tests, quality checks,
48
+ distribution building, and publishing.
49
+ - Clean submitted source archives no longer contain generated caches, bytecode,
50
+ egg-info directories, or previously built artifacts. Standard distribution metadata
51
+ generated during an sdist build is retained as required by the build backend.
52
+
53
+ ### Compatibility note
54
+
55
+ Sparse fits use scikit-learn's lasso solver. The implementation now mirrors the
56
+ R workflow more closely through standardization and CV behavior, but it is not
57
+ bit-for-bit equivalent to `glmnet`.
58
+
59
+ ## 0.1.7
60
+
61
+ - Previous Python source release supplied for audit.
@@ -0,0 +1,20 @@
1
+ @software{wang_lu_2026_varguid_python,
2
+ author = {Wang, Zihao and Lu, Min},
3
+ title = {{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python},
4
+ version = {0.1.8},
5
+ year = {2026},
6
+ publisher = {Zenodo},
7
+ doi = {10.5281/zenodo.20816141},
8
+ url = {https://doi.org/10.5281/zenodo.20816141}
9
+ }
10
+
11
+ @article{liu_lu_2026_varguid,
12
+ author = {Liu, Sibei and Lu, Min},
13
+ title = {Variance-Guided Regression for Heteroscedastic Data With a Grouping-Based Extension for Nonlinear Prediction},
14
+ journal = {Statistics in Medicine},
15
+ volume = {45},
16
+ number = {13-14},
17
+ pages = {e70632},
18
+ year = {2026},
19
+ doi = {10.1002/sim.70632}
20
+ }
@@ -0,0 +1,40 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use varguid, please cite both this software and the method paper listed in references."
3
+ type: software
4
+ title: "{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python"
5
+ version: 0.1.8
6
+ doi: "10.5281/zenodo.20816141"
7
+ date-released: "2026-06-23"
8
+ authors:
9
+ - family-names: Wang
10
+ given-names: Zihao
11
+ - family-names: Lu
12
+ given-names: Min
13
+ abstract: >-
14
+ A Python implementation of variance-guided regression for heteroscedastic
15
+ data, including baseline and variance-guided linear prediction, optional
16
+ lasso regularization, formula support, coefficient inference for non-lasso
17
+ fits, and a scikit-learn-compatible estimator.
18
+ repository-code: "https://github.com/zionwzz/varguid-python"
19
+ url: "https://github.com/zionwzz/varguid-python"
20
+ license: GPL-2.0-or-later
21
+ keywords:
22
+ - regression
23
+ - heteroscedasticity
24
+ - weighted least squares
25
+ - mean-variance modeling
26
+ - lasso
27
+ references:
28
+ - type: article
29
+ authors:
30
+ - family-names: Liu
31
+ given-names: Sibei
32
+ - family-names: Lu
33
+ given-names: Min
34
+ title: "Variance-Guided Regression for Heteroscedastic Data With a Grouping-Based Extension for Nonlinear Prediction"
35
+ journal: "Statistics in Medicine"
36
+ volume: 45
37
+ issue: "13-14"
38
+ start: e70632
39
+ year: 2026
40
+ doi: 10.1002/sim.70632
@@ -0,0 +1,53 @@
1
+ # Contributing
2
+
3
+ ## Development environment
4
+
5
+ Create and activate a Python 3.12+ virtual environment, then install the package
6
+ and development dependencies:
7
+
8
+ ```bash
9
+ python -m pip install --upgrade pip
10
+ python -m pip install -e ".[dev]"
11
+ ```
12
+
13
+ ## Required checks
14
+
15
+ Run these commands from the repository root before submitting a change:
16
+
17
+ ```bash
18
+ python -m pytest
19
+ ruff check src tests
20
+ ruff format --check src tests
21
+ mypy src tests
22
+ check-manifest
23
+ python -m build
24
+ python -m twine check dist/*
25
+ ```
26
+
27
+ The same checks are grouped into nox sessions:
28
+
29
+ ```bash
30
+ nox -s tests lint build
31
+ ```
32
+
33
+ Install the local hooks once with `pre-commit install`, then run
34
+ `pre-commit run --all-files` before committing.
35
+
36
+ ## Regression expectations
37
+
38
+ Changes to the fitting algorithm should include tests for:
39
+
40
+ - agreement with the update order in the attached R `R/irls.R` implementation;
41
+ - DataFrame feature-name safety;
42
+ - formula intercept behavior;
43
+ - lasso reproducibility and scaling behavior, when applicable; and
44
+ - the scikit-learn estimator contract.
45
+
46
+ Sparse fits use scikit-learn rather than R's `glmnet`; document any intentional
47
+ behavioral difference instead of asserting bit-for-bit cross-language parity.
48
+
49
+ ## Releases
50
+
51
+ Update both `pyproject.toml` and `src/varguid/__init__.py`, add an entry to
52
+ `CHANGELOG.md`, and follow `RELEASING.md`. Do not upload from a developer
53
+ machine when the Trusted Publishing workflow is available.
@@ -0,0 +1,124 @@
1
+ # varguid 0.1.8 fix log
2
+
3
+ Audit date: 2026-06-22
4
+
5
+ Inputs reviewed:
6
+
7
+ - Python source archive `varguid_py_0.1.7_source.zip`.
8
+ - R source package `varGuid_0.1.5.tar.gz`, especially `R/irls.R` and `R/add.R`.
9
+
10
+ ## Corrections
11
+
12
+ ### 1. DataFrame prediction schema
13
+
14
+ **Problem:** prediction used positional columns. Reordering the same DataFrame
15
+ columns silently changed predictions.
16
+
17
+ **Correction:** fitted DataFrame names are retained. Prediction reorders a
18
+ complete matching schema and raises a clear error for missing, additional, or
19
+ ambiguous string-equivalent names.
20
+
21
+ **Regression coverage:** `tests/test_validation.py`.
22
+
23
+ ### 2. Formula intercept handling
24
+
25
+ **Problem:** `lmv_formula("y ~ 0 + x1 + x2", ...)` still inserted an intercept.
26
+
27
+ **Correction:** the Patsy design matrix determines `fit_intercept`; no-intercept
28
+ formulas now remain no-intercept during fitting, summaries, and prediction.
29
+ Design information is retained for transformed and categorical predictors.
30
+
31
+ **Regression coverage:** `tests/test_basic.py` and formula prediction tests.
32
+
33
+ Formula fitting and prediction now use Patsy's `NA_action="raise"`, preventing
34
+ missing values from silently reducing the number of fitted or predicted rows.
35
+
36
+ ### 3. Non-lasso R algorithm parity
37
+
38
+ **Problem:** the Python implementation clipped fitted variance values and used a
39
+ different model update order once the coefficient-change threshold was met.
40
+
41
+ **Correction:** variance fits are no longer clipped. Candidate mean models are
42
+ assigned before the convergence branch, matching the attached `R/irls.R`
43
+ control flow. The last committed model remains the returned variance-guided fit.
44
+ An exactly zero variance fit receives a finite uniform-weight fallback.
45
+
46
+ **Regression coverage:** `tests/test_r_compatibility.py` contains an independent
47
+ translation of the attached R non-lasso procedure and exercises the
48
+ post-convergence branch.
49
+
50
+ ### 4. Lasso scaling and randomness
51
+
52
+ **Problem:** predictors were sent to `LassoCV` without glmnet-like
53
+ standardization, and `random_state` had no practical effect under the previous
54
+ solver configuration.
55
+
56
+ **Correction:** mean and variance predictors are standardized using the
57
+ applicable sample weights, coefficients are mapped back to original units, and
58
+ cross-validation uses shuffled `KFold` splits generated from a deterministic
59
+ seed stream. The functional API now accepts the Scientific Python `rng`
60
+ keyword, normalizes it with `numpy.random.default_rng`, and retains
61
+ `random_state` as a deprecated alias. The scikit-learn estimator keeps its
62
+ conventional `random_state` parameter. The R default of 3 folds for at most 80
63
+ rows and 10 otherwise is retained when `cv_folds=10`.
64
+
65
+ **Limitation:** scikit-learn and `glmnet` use different solvers and lambda-grid
66
+ implementations. Sparse coefficients are reproducible within Python but are not
67
+ claimed to be bit-for-bit identical to R.
68
+
69
+ **Regression coverage:** lasso smoke, reproducibility, and unit-invariance tests.
70
+
71
+ ### 5. Scikit-learn estimator contract
72
+
73
+ **Problem:** the wrapper had the wrong mixin order, raised the wrong pre-fit
74
+ exception, omitted `n_features_in_`, and did not fully validate prediction
75
+ features.
76
+
77
+ **Correction:** `VarGuidRegressor` now inherits `RegressorMixin` before
78
+ `BaseEstimator`, uses scikit-learn validation helpers, records standard fitted
79
+ attributes, and raises `NotFittedError` before fit.
80
+
81
+ **Regression coverage:** the full `sklearn.utils.estimator_checks.check_estimator`
82
+ suite plus focused wrapper tests.
83
+
84
+ ### 6. Packaging, CI, and release security
85
+
86
+ The Patsy lower bound was raised from `1.0` to `1.0.1` because Patsy 1.0.0 is a
87
+ yanked release.
88
+
89
+ **Problem:** the supplied archive included old `dist/`, cache folders, bytecode,
90
+ and egg-info; configured lint/type checks failed; release instructions relied on
91
+ manual token-based uploads.
92
+
93
+ **Correction:** lint and type issues were resolved, the nox build glob was
94
+ corrected, source packaging was cleaned, and CI now tests supported Python
95
+ versions. Release publishing uses a separate least-privilege OIDC job through a
96
+ protected `pypi` environment. Third-party actions are pinned to full immutable
97
+ commit SHAs and tracked by Dependabot. A separate credential-limited job
98
+ generates and verifies SLSA build-provenance attestations before the PyPI
99
+ publishing job can run. The wheel includes a PEP 561 `py.typed` marker.
100
+
101
+ ### 7. Synthetic-data compatibility
102
+
103
+ **Problem:** an early validation change rejected `n_features < 10`, whereas the
104
+ 0.1.7 helper and attached R generator expand such requests to ten predictors.
105
+
106
+ **Correction:** positive feature counts below ten are again expanded to ten.
107
+ Input validation, the preferred `rng` interface, and valid negative
108
+ equicorrelation support are retained.
109
+
110
+ **Regression coverage:** `tests/test_validation.py`.
111
+
112
+ ## Validation scope
113
+
114
+ The final release was validated with pytest, Ruff lint and format checks, mypy,
115
+ package build, Twine metadata checks, archive-content inspection, wheel
116
+ installation, source-distribution installation, and installed-package execution
117
+ of the documented examples. Exact commands and outputs are recorded in
118
+ `VALIDATION_LOG_0.1.8.txt`.
119
+
120
+ The available local interpreter was CPython 3.13.5. Python 3.12 and 3.14 are
121
+ configured in CI but were not available for local execution in this audit.
122
+ An R interpreter was not available locally; non-lasso parity was checked against
123
+ an independent Python translation of the attached R source rather than by
124
+ executing R itself.