varguid 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- varguid-0.1.8/.github/dependabot.yml +10 -0
- varguid-0.1.8/.github/workflows/ci.yml +76 -0
- varguid-0.1.8/.github/workflows/python-publish.yml +70 -0
- varguid-0.1.8/.gitignore +26 -0
- varguid-0.1.8/.pre-commit-config.yaml +15 -0
- varguid-0.1.8/CHANGELOG.md +61 -0
- varguid-0.1.8/CITATION.bib +20 -0
- varguid-0.1.8/CITATION.cff +40 -0
- varguid-0.1.8/CONTRIBUTING.md +53 -0
- varguid-0.1.8/FIX_LOG_0.1.8.md +124 -0
- varguid-0.1.8/LICENSE +339 -0
- varguid-0.1.8/MANIFEST.in +25 -0
- varguid-0.1.8/PKG-INFO +266 -0
- varguid-0.1.8/PUBLICATION_CHECKLIST.md +59 -0
- varguid-0.1.8/PUBLICATION_METADATA_LOG_0.1.8.md +24 -0
- varguid-0.1.8/PUBLICATION_VALIDATION_LOG_0.1.8.txt +37 -0
- varguid-0.1.8/README.md +215 -0
- varguid-0.1.8/RELEASING.md +59 -0
- varguid-0.1.8/VALIDATION_LOG_0.1.8.txt +101 -0
- varguid-0.1.8/docs/index.md +40 -0
- varguid-0.1.8/noxfile.py +30 -0
- varguid-0.1.8/pyproject.toml +117 -0
- varguid-0.1.8/setup.cfg +4 -0
- varguid-0.1.8/src/varguid/__init__.py +17 -0
- varguid-0.1.8/src/varguid/data/cobra2d.csv +501 -0
- varguid-0.1.8/src/varguid/datasets.py +93 -0
- varguid-0.1.8/src/varguid/model.py +879 -0
- varguid-0.1.8/src/varguid/py.typed +0 -0
- varguid-0.1.8/src/varguid.egg-info/PKG-INFO +266 -0
- varguid-0.1.8/src/varguid.egg-info/SOURCES.txt +38 -0
- varguid-0.1.8/src/varguid.egg-info/dependency_links.txt +1 -0
- varguid-0.1.8/src/varguid.egg-info/requires.txt +22 -0
- varguid-0.1.8/src/varguid.egg-info/top_level.txt +1 -0
- varguid-0.1.8/tests/benchmarks/README.md +12 -0
- varguid-0.1.8/tests/benchmarks/benchmark_statsmodels.py +39 -0
- varguid-0.1.8/tests/test_basic.py +179 -0
- varguid-0.1.8/tests/test_examples.py +45 -0
- varguid-0.1.8/tests/test_r_compatibility.py +93 -0
- varguid-0.1.8/tests/test_sklearn.py +9 -0
- varguid-0.1.8/tests/test_validation.py +194 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
concurrency:
|
|
11
|
+
group: ci-${{ github.workflow }}-${{ github.ref }}
|
|
12
|
+
cancel-in-progress: true
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
tests:
|
|
16
|
+
name: Python ${{ matrix.python-version }}
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
strategy:
|
|
19
|
+
fail-fast: false
|
|
20
|
+
matrix:
|
|
21
|
+
python-version: ["3.12", "3.13", "3.14"]
|
|
22
|
+
steps:
|
|
23
|
+
- name: Check out source
|
|
24
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
cache: pip
|
|
30
|
+
- name: Install package and test dependencies
|
|
31
|
+
run: python -m pip install --upgrade pip && python -m pip install -e ".[dev]"
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: python -m pytest
|
|
34
|
+
|
|
35
|
+
quality:
|
|
36
|
+
name: Lint and type-check
|
|
37
|
+
runs-on: ubuntu-latest
|
|
38
|
+
steps:
|
|
39
|
+
- name: Check out source
|
|
40
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
41
|
+
- name: Set up Python
|
|
42
|
+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
|
43
|
+
with:
|
|
44
|
+
python-version: "3.13"
|
|
45
|
+
cache: pip
|
|
46
|
+
- name: Install package and development dependencies
|
|
47
|
+
run: python -m pip install --upgrade pip && python -m pip install -e ".[dev]"
|
|
48
|
+
- name: Ruff
|
|
49
|
+
run: ruff check src tests && ruff format --check src tests
|
|
50
|
+
- name: Mypy
|
|
51
|
+
run: mypy src tests
|
|
52
|
+
- name: Check source manifest
|
|
53
|
+
run: check-manifest
|
|
54
|
+
|
|
55
|
+
build:
|
|
56
|
+
name: Build distributions
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
needs: [tests, quality]
|
|
59
|
+
steps:
|
|
60
|
+
- name: Check out source
|
|
61
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
62
|
+
- name: Set up Python
|
|
63
|
+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
|
64
|
+
with:
|
|
65
|
+
python-version: "3.13"
|
|
66
|
+
cache: pip
|
|
67
|
+
- name: Install build tools
|
|
68
|
+
run: python -m pip install --upgrade pip build twine
|
|
69
|
+
- name: Build and validate
|
|
70
|
+
run: python -m build && python -m twine check dist/*
|
|
71
|
+
- name: Upload distributions
|
|
72
|
+
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
|
73
|
+
with:
|
|
74
|
+
name: python-package-distributions
|
|
75
|
+
path: dist/
|
|
76
|
+
if-no-files-found: error
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
name: Upload Python Package
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
release-build:
|
|
13
|
+
name: Build and validate release distributions
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- name: Check out source
|
|
18
|
+
uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Install build and test tools
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
python -m pip install build twine pytest
|
|
29
|
+
python -m pip install -e ".[dev]"
|
|
30
|
+
|
|
31
|
+
- name: Run tests
|
|
32
|
+
run: python -m pytest -q
|
|
33
|
+
|
|
34
|
+
- name: Build release distributions
|
|
35
|
+
run: python -m build
|
|
36
|
+
|
|
37
|
+
- name: Check release distributions
|
|
38
|
+
run: python -m twine check dist/*
|
|
39
|
+
|
|
40
|
+
- name: Upload distributions
|
|
41
|
+
uses: actions/upload-artifact@v4
|
|
42
|
+
with:
|
|
43
|
+
name: release-dists
|
|
44
|
+
path: dist/
|
|
45
|
+
|
|
46
|
+
pypi-publish:
|
|
47
|
+
name: Publish release distributions to PyPI
|
|
48
|
+
runs-on: ubuntu-latest
|
|
49
|
+
needs:
|
|
50
|
+
- release-build
|
|
51
|
+
|
|
52
|
+
permissions:
|
|
53
|
+
id-token: write
|
|
54
|
+
contents: read
|
|
55
|
+
|
|
56
|
+
environment:
|
|
57
|
+
name: pypi
|
|
58
|
+
url: https://pypi.org/project/varguid/
|
|
59
|
+
|
|
60
|
+
steps:
|
|
61
|
+
- name: Retrieve release distributions
|
|
62
|
+
uses: actions/download-artifact@v4
|
|
63
|
+
with:
|
|
64
|
+
name: release-dists
|
|
65
|
+
path: dist/
|
|
66
|
+
|
|
67
|
+
- name: Publish release distributions to PyPI
|
|
68
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
69
|
+
with:
|
|
70
|
+
packages-dir: dist/
|
varguid-0.1.8/.gitignore
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Python bytecode and caches
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
.pytest_cache/
|
|
5
|
+
.mypy_cache/
|
|
6
|
+
.ruff_cache/
|
|
7
|
+
|
|
8
|
+
# Environments and local tooling
|
|
9
|
+
.venv/
|
|
10
|
+
venv/
|
|
11
|
+
.nox/
|
|
12
|
+
|
|
13
|
+
# Coverage
|
|
14
|
+
.coverage
|
|
15
|
+
.coverage.*
|
|
16
|
+
htmlcov/
|
|
17
|
+
|
|
18
|
+
# Build products and generated metadata
|
|
19
|
+
build/
|
|
20
|
+
dist/
|
|
21
|
+
*.egg-info/
|
|
22
|
+
|
|
23
|
+
# Editors and operating systems
|
|
24
|
+
.DS_Store
|
|
25
|
+
.idea/
|
|
26
|
+
.vscode/
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.15.18
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff-check
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
9
|
+
rev: v6.0.0
|
|
10
|
+
hooks:
|
|
11
|
+
- id: end-of-file-fixer
|
|
12
|
+
- id: trailing-whitespace
|
|
13
|
+
- id: check-yaml
|
|
14
|
+
- id: check-toml
|
|
15
|
+
- id: check-added-large-files
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented here.
|
|
4
|
+
|
|
5
|
+
## 0.1.8 - 2026-06-22
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Preserved DataFrame feature identity during prediction by reordering known
|
|
10
|
+
columns and rejecting missing or unexpected columns.
|
|
11
|
+
- Honored no-intercept Patsy formulas such as `y ~ 0 + x1 + x2`.
|
|
12
|
+
- Made formula fitting and prediction reject missing values instead of silently
|
|
13
|
+
dropping affected rows through Patsy.
|
|
14
|
+
- Matched the attached R non-lasso update order after the convergence threshold
|
|
15
|
+
is reached and removed variance-prediction clipping.
|
|
16
|
+
- Added a safe fallback for an exactly zero fitted variance model.
|
|
17
|
+
- Standardized lasso features and added shuffled reproducible CV folds.
|
|
18
|
+
- Added the Scientific Python `rng` interface for functional lasso fits while
|
|
19
|
+
retaining `random_state` as a deprecated compatibility alias.
|
|
20
|
+
- Corrected scikit-learn estimator behavior: mixin order, `NotFittedError`,
|
|
21
|
+
`n_features_in_`, feature-name checks, and fitted attributes.
|
|
22
|
+
- Restored keyword compatibility for `predict(object=...)` and `prd(object=...)`.
|
|
23
|
+
- Corrected the nox build session so Twine receives expanded artifact paths.
|
|
24
|
+
- Preserved the historical `generate_cobra2d(n_features < 10)` behavior by
|
|
25
|
+
expanding the generated design to ten predictors.
|
|
26
|
+
|
|
27
|
+
### Added
|
|
28
|
+
|
|
29
|
+
- Independent regression tests for the attached R non-lasso algorithm.
|
|
30
|
+
- Full scikit-learn estimator checks.
|
|
31
|
+
- README example, formula, schema-validation, lasso scale-invariance, RNG, and
|
|
32
|
+
exact-fit regression tests.
|
|
33
|
+
- A Trusted Publishing release workflow with least-privilege permissions and
|
|
34
|
+
immutable GitHub Action commit pins.
|
|
35
|
+
- A separate SLSA build-provenance attestation and verification job, plus
|
|
36
|
+
Dependabot tracking for pinned GitHub Actions.
|
|
37
|
+
- A PEP 561 `py.typed` marker and typed-package classifier.
|
|
38
|
+
- Release, contribution, fix-log, and validation documentation.
|
|
39
|
+
|
|
40
|
+
### Changed
|
|
41
|
+
|
|
42
|
+
- Adopted the publication title `{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python`.
|
|
43
|
+
- Updated canonical repository links to `zionwzz/varguid-python` and added machine-readable and BibTeX citation metadata.
|
|
44
|
+
- Updated a test annotation for compatibility with mypy 2.1.0; runtime behavior is unchanged.
|
|
45
|
+
- Raised the supported Python floor to 3.12 and refreshed core dependency floors
|
|
46
|
+
in line with the project's Scientific Python support policy.
|
|
47
|
+
- CI now targets Python 3.12, 3.13, and 3.14 and separates tests, quality checks,
|
|
48
|
+
distribution building, and publishing.
|
|
49
|
+
- Clean submitted source archives no longer contain generated caches, bytecode,
|
|
50
|
+
egg-info directories, or previously built artifacts. Standard distribution metadata
|
|
51
|
+
generated during an sdist build is retained as required by the build backend.
|
|
52
|
+
|
|
53
|
+
### Compatibility note
|
|
54
|
+
|
|
55
|
+
Sparse fits use scikit-learn's lasso solver. The implementation now mirrors the
|
|
56
|
+
R workflow more closely through standardization and CV behavior, but it is not
|
|
57
|
+
bit-for-bit equivalent to `glmnet`.
|
|
58
|
+
|
|
59
|
+
## 0.1.7
|
|
60
|
+
|
|
61
|
+
- Previous Python source release supplied for audit.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
@software{wang_lu_2026_varguid_python,
|
|
2
|
+
author = {Wang, Zihao and Lu, Min},
|
|
3
|
+
title = {{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python},
|
|
4
|
+
version = {0.1.8},
|
|
5
|
+
year = {2026},
|
|
6
|
+
publisher = {Zenodo},
|
|
7
|
+
doi = {10.5281/zenodo.20816141},
|
|
8
|
+
url = {https://doi.org/10.5281/zenodo.20816141}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
@article{liu_lu_2026_varguid,
|
|
12
|
+
author = {Liu, Sibei and Lu, Min},
|
|
13
|
+
title = {Variance-Guided Regression for Heteroscedastic Data With a Grouping-Based Extension for Nonlinear Prediction},
|
|
14
|
+
journal = {Statistics in Medicine},
|
|
15
|
+
volume = {45},
|
|
16
|
+
number = {13-14},
|
|
17
|
+
pages = {e70632},
|
|
18
|
+
year = {2026},
|
|
19
|
+
doi = {10.1002/sim.70632}
|
|
20
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use varguid, please cite both this software and the method paper listed in references."
|
|
3
|
+
type: software
|
|
4
|
+
title: "{varguid}: Variance-Guided Regression Improving Upon OLS and ANOVA for Python"
|
|
5
|
+
version: 0.1.8
|
|
6
|
+
doi: "10.5281/zenodo.20816141"
|
|
7
|
+
date-released: "2026-06-23"
|
|
8
|
+
authors:
|
|
9
|
+
- family-names: Wang
|
|
10
|
+
given-names: Zihao
|
|
11
|
+
- family-names: Lu
|
|
12
|
+
given-names: Min
|
|
13
|
+
abstract: >-
|
|
14
|
+
A Python implementation of variance-guided regression for heteroscedastic
|
|
15
|
+
data, including baseline and variance-guided linear prediction, optional
|
|
16
|
+
lasso regularization, formula support, coefficient inference for non-lasso
|
|
17
|
+
fits, and a scikit-learn-compatible estimator.
|
|
18
|
+
repository-code: "https://github.com/zionwzz/varguid-python"
|
|
19
|
+
url: "https://github.com/zionwzz/varguid-python"
|
|
20
|
+
license: GPL-2.0-or-later
|
|
21
|
+
keywords:
|
|
22
|
+
- regression
|
|
23
|
+
- heteroscedasticity
|
|
24
|
+
- weighted least squares
|
|
25
|
+
- mean-variance modeling
|
|
26
|
+
- lasso
|
|
27
|
+
references:
|
|
28
|
+
- type: article
|
|
29
|
+
authors:
|
|
30
|
+
- family-names: Liu
|
|
31
|
+
given-names: Sibei
|
|
32
|
+
- family-names: Lu
|
|
33
|
+
given-names: Min
|
|
34
|
+
title: "Variance-Guided Regression for Heteroscedastic Data With a Grouping-Based Extension for Nonlinear Prediction"
|
|
35
|
+
journal: "Statistics in Medicine"
|
|
36
|
+
volume: 45
|
|
37
|
+
issue: "13-14"
|
|
38
|
+
start: e70632
|
|
39
|
+
year: 2026
|
|
40
|
+
doi: 10.1002/sim.70632
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
## Development environment
|
|
4
|
+
|
|
5
|
+
Create and activate a Python 3.12+ virtual environment, then install the package
|
|
6
|
+
and development dependencies:
|
|
7
|
+
|
|
8
|
+
```bash
|
|
9
|
+
python -m pip install --upgrade pip
|
|
10
|
+
python -m pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Required checks
|
|
14
|
+
|
|
15
|
+
Run these commands from the repository root before submitting a change:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
python -m pytest
|
|
19
|
+
ruff check src tests
|
|
20
|
+
ruff format --check src tests
|
|
21
|
+
mypy src tests
|
|
22
|
+
check-manifest
|
|
23
|
+
python -m build
|
|
24
|
+
python -m twine check dist/*
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The same checks are grouped into nox sessions:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
nox -s tests lint build
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Install the local hooks once with `pre-commit install`, then run
|
|
34
|
+
`pre-commit run --all-files` before committing.
|
|
35
|
+
|
|
36
|
+
## Regression expectations
|
|
37
|
+
|
|
38
|
+
Changes to the fitting algorithm should include tests for:
|
|
39
|
+
|
|
40
|
+
- agreement with the update order in the attached R `R/irls.R` implementation;
|
|
41
|
+
- DataFrame feature-name safety;
|
|
42
|
+
- formula intercept behavior;
|
|
43
|
+
- lasso reproducibility and scaling behavior, when applicable; and
|
|
44
|
+
- the scikit-learn estimator contract.
|
|
45
|
+
|
|
46
|
+
Sparse fits use scikit-learn rather than R's `glmnet`; document any intentional
|
|
47
|
+
behavioral difference instead of asserting bit-for-bit cross-language parity.
|
|
48
|
+
|
|
49
|
+
## Releases
|
|
50
|
+
|
|
51
|
+
Update both `pyproject.toml` and `src/varguid/__init__.py`, add an entry to
|
|
52
|
+
`CHANGELOG.md`, and follow `RELEASING.md`. Do not upload from a developer
|
|
53
|
+
machine when the Trusted Publishing workflow is available.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# varguid 0.1.8 fix log
|
|
2
|
+
|
|
3
|
+
Audit date: 2026-06-22
|
|
4
|
+
|
|
5
|
+
Inputs reviewed:
|
|
6
|
+
|
|
7
|
+
- Python source archive `varguid_py_0.1.7_source.zip`.
|
|
8
|
+
- R source package `varGuid_0.1.5.tar.gz`, especially `R/irls.R` and `R/add.R`.
|
|
9
|
+
|
|
10
|
+
## Corrections
|
|
11
|
+
|
|
12
|
+
### 1. DataFrame prediction schema
|
|
13
|
+
|
|
14
|
+
**Problem:** prediction used positional columns. Reordering the same DataFrame
|
|
15
|
+
columns silently changed predictions.
|
|
16
|
+
|
|
17
|
+
**Correction:** fitted DataFrame names are retained. Prediction reorders a
|
|
18
|
+
complete matching schema and raises a clear error for missing, additional, or
|
|
19
|
+
ambiguous string-equivalent names.
|
|
20
|
+
|
|
21
|
+
**Regression coverage:** `tests/test_validation.py`.
|
|
22
|
+
|
|
23
|
+
### 2. Formula intercept handling
|
|
24
|
+
|
|
25
|
+
**Problem:** `lmv_formula("y ~ 0 + x1 + x2", ...)` still inserted an intercept.
|
|
26
|
+
|
|
27
|
+
**Correction:** the Patsy design matrix determines `fit_intercept`; no-intercept
|
|
28
|
+
formulas now remain no-intercept during fitting, summaries, and prediction.
|
|
29
|
+
Design information is retained for transformed and categorical predictors.
|
|
30
|
+
|
|
31
|
+
**Regression coverage:** `tests/test_basic.py` and formula prediction tests.
|
|
32
|
+
|
|
33
|
+
Formula fitting and prediction now use Patsy's `NA_action="raise"`, preventing
|
|
34
|
+
missing values from silently reducing the number of fitted or predicted rows.
|
|
35
|
+
|
|
36
|
+
### 3. Non-lasso R algorithm parity
|
|
37
|
+
|
|
38
|
+
**Problem:** the Python implementation clipped fitted variance values and used a
|
|
39
|
+
different model update order once the coefficient-change threshold was met.
|
|
40
|
+
|
|
41
|
+
**Correction:** variance fits are no longer clipped. Candidate mean models are
|
|
42
|
+
assigned before the convergence branch, matching the attached `R/irls.R`
|
|
43
|
+
control flow. The last committed model remains the returned variance-guided fit.
|
|
44
|
+
An exactly zero variance fit receives a finite uniform-weight fallback.
|
|
45
|
+
|
|
46
|
+
**Regression coverage:** `tests/test_r_compatibility.py` contains an independent
|
|
47
|
+
translation of the attached R non-lasso procedure and exercises the
|
|
48
|
+
post-convergence branch.
|
|
49
|
+
|
|
50
|
+
### 4. Lasso scaling and randomness
|
|
51
|
+
|
|
52
|
+
**Problem:** predictors were sent to `LassoCV` without glmnet-like
|
|
53
|
+
standardization, and `random_state` had no practical effect under the previous
|
|
54
|
+
solver configuration.
|
|
55
|
+
|
|
56
|
+
**Correction:** mean and variance predictors are standardized using the
|
|
57
|
+
applicable sample weights, coefficients are mapped back to original units, and
|
|
58
|
+
cross-validation uses shuffled `KFold` splits generated from a deterministic
|
|
59
|
+
seed stream. The functional API now accepts the Scientific Python `rng`
|
|
60
|
+
keyword, normalizes it with `numpy.random.default_rng`, and retains
|
|
61
|
+
`random_state` as a deprecated alias. The scikit-learn estimator keeps its
|
|
62
|
+
conventional `random_state` parameter. The R default of 3 folds for at most 80
|
|
63
|
+
rows and 10 otherwise is retained when `cv_folds=10`.
|
|
64
|
+
|
|
65
|
+
**Limitation:** scikit-learn and `glmnet` use different solvers and lambda-grid
|
|
66
|
+
implementations. Sparse coefficients are reproducible within Python but are not
|
|
67
|
+
claimed to be bit-for-bit identical to R.
|
|
68
|
+
|
|
69
|
+
**Regression coverage:** lasso smoke, reproducibility, and unit-invariance tests.
|
|
70
|
+
|
|
71
|
+
### 5. Scikit-learn estimator contract
|
|
72
|
+
|
|
73
|
+
**Problem:** the wrapper had the wrong mixin order, raised the wrong pre-fit
|
|
74
|
+
exception, omitted `n_features_in_`, and did not fully validate prediction
|
|
75
|
+
features.
|
|
76
|
+
|
|
77
|
+
**Correction:** `VarGuidRegressor` now inherits `RegressorMixin` before
|
|
78
|
+
`BaseEstimator`, uses scikit-learn validation helpers, records standard fitted
|
|
79
|
+
attributes, and raises `NotFittedError` before fit.
|
|
80
|
+
|
|
81
|
+
**Regression coverage:** the full `sklearn.utils.estimator_checks.check_estimator`
|
|
82
|
+
suite plus focused wrapper tests.
|
|
83
|
+
|
|
84
|
+
### 6. Packaging, CI, and release security
|
|
85
|
+
|
|
86
|
+
The Patsy lower bound was raised from `1.0` to `1.0.1` because Patsy 1.0.0 is a
|
|
87
|
+
yanked release.
|
|
88
|
+
|
|
89
|
+
**Problem:** the supplied archive included old `dist/`, cache folders, bytecode,
|
|
90
|
+
and egg-info; configured lint/type checks failed; release instructions relied on
|
|
91
|
+
manual token-based uploads.
|
|
92
|
+
|
|
93
|
+
**Correction:** lint and type issues were resolved, the nox build glob was
|
|
94
|
+
corrected, source packaging was cleaned, and CI now tests supported Python
|
|
95
|
+
versions. Release publishing uses a separate least-privilege OIDC job through a
|
|
96
|
+
protected `pypi` environment. Third-party actions are pinned to full immutable
|
|
97
|
+
commit SHAs and tracked by Dependabot. A separate credential-limited job
|
|
98
|
+
generates and verifies SLSA build-provenance attestations before the PyPI
|
|
99
|
+
publishing job can run. The wheel includes a PEP 561 `py.typed` marker.
|
|
100
|
+
|
|
101
|
+
### 7. Synthetic-data compatibility
|
|
102
|
+
|
|
103
|
+
**Problem:** an early validation change rejected `n_features < 10`, whereas the
|
|
104
|
+
0.1.7 helper and attached R generator expand such requests to ten predictors.
|
|
105
|
+
|
|
106
|
+
**Correction:** positive feature counts below ten are again expanded to ten.
|
|
107
|
+
Input validation, the preferred `rng` interface, and valid negative
|
|
108
|
+
equicorrelation support are retained.
|
|
109
|
+
|
|
110
|
+
**Regression coverage:** `tests/test_validation.py`.
|
|
111
|
+
|
|
112
|
+
## Validation scope
|
|
113
|
+
|
|
114
|
+
The final release was validated with pytest, Ruff lint and format checks, mypy,
|
|
115
|
+
package build, Twine metadata checks, archive-content inspection, wheel
|
|
116
|
+
installation, source-distribution installation, and installed-package execution
|
|
117
|
+
of the documented examples. Exact commands and outputs are recorded in
|
|
118
|
+
`VALIDATION_LOG_0.1.8.txt`.
|
|
119
|
+
|
|
120
|
+
The available local interpreter was CPython 3.13.5. Python 3.12 and 3.14 are
|
|
121
|
+
configured in CI but were not available for local execution in this audit.
|
|
122
|
+
An R interpreter was not available locally; non-lasso parity was checked against
|
|
123
|
+
an independent Python translation of the attached R source rather than by
|
|
124
|
+
executing R itself.
|