stathead 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stathead-0.1.0/.gitignore +55 -0
- stathead-0.1.0/PKG-INFO +121 -0
- stathead-0.1.0/README.md +93 -0
- stathead-0.1.0/pyproject.toml +40 -0
- stathead-0.1.0/src/stathead/__init__.py +43 -0
- stathead-0.1.0/src/stathead/_fetch.py +89 -0
- stathead-0.1.0/src/stathead/adp.py +67 -0
- stathead-0.1.0/src/stathead/features.py +27 -0
- stathead-0.1.0/src/stathead/ktc.py +66 -0
- stathead-0.1.0/src/stathead/predictions.py +46 -0
- stathead-0.1.0/src/stathead/prospects.py +18 -0
- stathead-0.1.0/tests/test_smoke.py +95 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
node_modules
|
|
2
|
+
dist
|
|
3
|
+
.DS_Store
|
|
4
|
+
*.local
|
|
5
|
+
public/data/*
|
|
6
|
+
!public/data/training-rows-cache-*.json
|
|
7
|
+
!public/data/trained-models-cache-*.json
|
|
8
|
+
!public/data/model-cache-*.json
|
|
9
|
+
!public/data/feature-store/
|
|
10
|
+
!public/data/score-store/
|
|
11
|
+
!public/data/ktc_rankings_*.json
|
|
12
|
+
!public/data/ktc_history.json
|
|
13
|
+
!public/data/nflverse_weekly_2025.json
|
|
14
|
+
!public/data/ppg-feature-ablation.json
|
|
15
|
+
!public/data/ppg-hyperparam-sweep.json
|
|
16
|
+
!public/data/residual-feature-ablation.json
|
|
17
|
+
!public/data/residual-hyperparam-sweep.json
|
|
18
|
+
!public/data/ktc-feature-ablation.json
|
|
19
|
+
!public/data/ktc-hyperparam-sweep.json
|
|
20
|
+
!public/data/pdf-career-ablation-predraft.json
|
|
21
|
+
!public/data/pdf-career-ablation-postdraft.json
|
|
22
|
+
!public/data/rsp-career-ablation-predraft.json
|
|
23
|
+
!public/data/rsp-career-ablation-postdraft.json
|
|
24
|
+
!public/data/pdf-only-career-test.json
|
|
25
|
+
!public/data/qb-beast-ablation.json
|
|
26
|
+
!public/data/ktc-forecasts-*.json
|
|
27
|
+
!public/data/redraft-projections.json
|
|
28
|
+
!public/data/cfbd-college-stats.json
|
|
29
|
+
!public/data/cfbd-sp-ratings.json
|
|
30
|
+
!public/data/cfbd-recruiting.json
|
|
31
|
+
!public/data/cfbd-games.json
|
|
32
|
+
!public/data/cfbd-team-talent.json
|
|
33
|
+
!public/data/cfbd-player-usage.json
|
|
34
|
+
!public/data/cfbd/
|
|
35
|
+
!public/data/cfbd/*.json
|
|
36
|
+
!public/data/pdf-prospect-features.json
|
|
37
|
+
!public/data/pdf-prospect-features-merged.json
|
|
38
|
+
!public/data/rsp-historical-rankings.json
|
|
39
|
+
!public/data/rsp-qb-features.json
|
|
40
|
+
!public/data/manual-cfbd-overrides.json
|
|
41
|
+
# Raw upstream sources committed as .csv.gz (~35 MB vs ~200 MB raw).
|
|
42
|
+
# TS readLocalFile auto-decompresses; Python runs `bash scripts/extract-data.sh`
|
|
43
|
+
# once to materialize the .csv. See scripts/pull-all-data-sources.sh.
|
|
44
|
+
!public/data/*.csv.gz
|
|
45
|
+
!public/data/ffc_adp_ppr_*.json
|
|
46
|
+
!public/data/espn_adp_*.json
|
|
47
|
+
.claude/*
|
|
48
|
+
!.claude/commands/
|
|
49
|
+
!.claude/commands/**
|
|
50
|
+
CLAUDE.local.md
|
|
51
|
+
__pycache__/
|
|
52
|
+
.nflverse-cache/
|
|
53
|
+
.cache/
|
|
54
|
+
.venv/
|
|
55
|
+
pdfs/
|
stathead-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stathead
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for the StatHead fantasy football model — rookie career predictions, historical ADP, dynasty values, and flattened feature matrices.
|
|
5
|
+
Project-URL: Homepage, https://github.com/dachhack/stathead
|
|
6
|
+
Project-URL: Source, https://github.com/dachhack/stathead
|
|
7
|
+
Project-URL: Issues, https://github.com/dachhack/stathead/issues
|
|
8
|
+
Author: StatHead
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: adp,dynasty,fantasy-football,ktc,nfl,rookie-prospects
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Requires-Dist: pandas>=1.5
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: duckdb>=0.10; extra == 'dev'
|
|
21
|
+
Requires-Dist: polars>=0.20; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
23
|
+
Provides-Extra: duckdb
|
|
24
|
+
Requires-Dist: duckdb>=0.10; extra == 'duckdb'
|
|
25
|
+
Provides-Extra: polars
|
|
26
|
+
Requires-Dist: polars>=0.20; extra == 'polars'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# stathead
|
|
30
|
+
|
|
31
|
+
Python client for the [StatHead](https://github.com/dachhack/stathead) fantasy football model. Returns pandas DataFrames of rookie career predictions, historical ADP, KeepTradeCut dynasty values, and the flattened feature matrix used to train the models.
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install stathead
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Optional extras:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install "stathead[polars]" # for .to_polars() helpers
|
|
43
|
+
pip install "stathead[duckdb]" # for local SQL querying
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import stathead as sh
|
|
50
|
+
|
|
51
|
+
# 2026 rookie class predictions (77 players × ~80 columns)
|
|
52
|
+
rookies = sh.load_career_predictions_2026()
|
|
53
|
+
rookies.nlargest(10, "percentile")[["name", "position", "predictedCareerPPG", "modelTier"]]
|
|
54
|
+
|
|
55
|
+
# Historical backtest — predicted vs actual for every drafted rookie 2010-2025
|
|
56
|
+
backtest = sh.load_career_backtest()
|
|
57
|
+
wr = backtest[backtest.position == "WR"]
|
|
58
|
+
wr.groupby("modelTier")[["actualPPG", "predictedPPG"]].mean()
|
|
59
|
+
|
|
60
|
+
# Historical ADP, every season fully populated
|
|
61
|
+
adp = sh.load_adp_historical()
|
|
62
|
+
adp[(adp.season == 2023) & (adp.adp <= 24)]
|
|
63
|
+
|
|
64
|
+
# Dynasty values
|
|
65
|
+
ktc = sh.load_ktc()
|
|
66
|
+
ktc.nlargest(25, "value_1qb")
|
|
67
|
+
|
|
68
|
+
# Daily KTC history (for momentum / trend analysis)
|
|
69
|
+
hist = sh.load_ktc_history()
|
|
70
|
+
hist.pivot_table(index="date", columns="name", values="value_1qb")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Pinning to a specific version
|
|
74
|
+
|
|
75
|
+
Loaders resolve against the upstream GitHub repo. Pin to a commit SHA, tag,
|
|
76
|
+
or branch for reproducibility:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
sh.pin_version("a6720e5") # or a tagged release
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Clear the local cache if you want to re-fetch:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
sh.clear_cache()
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Data freshness
|
|
89
|
+
|
|
90
|
+
Data files are cached under `~/.cache/stathead/<ref>/` after the first
|
|
91
|
+
download. Subsequent runs read from disk — no network roundtrip. Delete the
|
|
92
|
+
cache directory or call `clear_cache()` to force a refresh.
|
|
93
|
+
|
|
94
|
+
## Available loaders
|
|
95
|
+
|
|
96
|
+
| Function | Returns | Shape |
|
|
97
|
+
|---|---|---|
|
|
98
|
+
| `load_career_predictions_2026()` | 2026 rookie predictions | ~77 × ~80 cols |
|
|
99
|
+
| `load_career_backtest()` | Historical rookies with pred + actual PPG | ~1087 × ~100 cols |
|
|
100
|
+
| `load_adp_historical()` | Model-training ADP 2010-2025 | 4507 × 10 |
|
|
101
|
+
| `load_adp_ffc(season=None)` | FFC PPR raw ADP (per season as fetched) | variable |
|
|
102
|
+
| `load_ktc()` | Current KTC dynasty values | ~500 × 9 |
|
|
103
|
+
| `load_ktc_history()` | Daily KTC history | ~100k × 7 |
|
|
104
|
+
| `load_prospect_grades(year=2026)` | Draft scouting grades | ~200 × 7 |
|
|
105
|
+
| `load_feature_matrix()` | Raw `feature-matrix.json` (dict) | — |
|
|
106
|
+
| `load_manual_overrides()` | Manual CFBD usage overrides (dict) | — |
|
|
107
|
+
|
|
108
|
+
## Licensing & attribution
|
|
109
|
+
|
|
110
|
+
Package code is MIT-licensed. The data this package retrieves is derived
|
|
111
|
+
from the StatHead project's own modeling pipeline; upstream sources
|
|
112
|
+
(nflverse, FFC, KeepTradeCut, CFBD, etc.) retain their own terms — see
|
|
113
|
+
each source's license before redistributing. If you're building on these
|
|
114
|
+
predictions, a link back to the StatHead repo is appreciated but not
|
|
115
|
+
required.
|
|
116
|
+
|
|
117
|
+
## Contributing
|
|
118
|
+
|
|
119
|
+
The package is small and focused — see
|
|
120
|
+
[`python/src/stathead/`](./src/stathead/) for the loader modules. Issues
|
|
121
|
+
and PRs welcome at the [main repo](https://github.com/dachhack/stathead).
|
stathead-0.1.0/README.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# stathead
|
|
2
|
+
|
|
3
|
+
Python client for the [StatHead](https://github.com/dachhack/stathead) fantasy football model. Returns pandas DataFrames of rookie career predictions, historical ADP, KeepTradeCut dynasty values, and the flattened feature matrix used to train the models.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install stathead
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Optional extras:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install "stathead[polars]" # for .to_polars() helpers
|
|
15
|
+
pip install "stathead[duckdb]" # for local SQL querying
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quick start
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import stathead as sh
|
|
22
|
+
|
|
23
|
+
# 2026 rookie class predictions (77 players × ~80 columns)
|
|
24
|
+
rookies = sh.load_career_predictions_2026()
|
|
25
|
+
rookies.nlargest(10, "percentile")[["name", "position", "predictedCareerPPG", "modelTier"]]
|
|
26
|
+
|
|
27
|
+
# Historical backtest — predicted vs actual for every drafted rookie 2010-2025
|
|
28
|
+
backtest = sh.load_career_backtest()
|
|
29
|
+
wr = backtest[backtest.position == "WR"]
|
|
30
|
+
wr.groupby("modelTier")[["actualPPG", "predictedPPG"]].mean()
|
|
31
|
+
|
|
32
|
+
# Historical ADP, every season fully populated
|
|
33
|
+
adp = sh.load_adp_historical()
|
|
34
|
+
adp[(adp.season == 2023) & (adp.adp <= 24)]
|
|
35
|
+
|
|
36
|
+
# Dynasty values
|
|
37
|
+
ktc = sh.load_ktc()
|
|
38
|
+
ktc.nlargest(25, "value_1qb")
|
|
39
|
+
|
|
40
|
+
# Daily KTC history (for momentum / trend analysis)
|
|
41
|
+
hist = sh.load_ktc_history()
|
|
42
|
+
hist.pivot_table(index="date", columns="name", values="value_1qb")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Pinning to a specific version
|
|
46
|
+
|
|
47
|
+
Loaders resolve against the upstream GitHub repo. Pin to a commit SHA, tag,
|
|
48
|
+
or branch for reproducibility:
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
sh.pin_version("a6720e5") # or a tagged release
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Clear the local cache if you want to re-fetch:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
sh.clear_cache()
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Data freshness
|
|
61
|
+
|
|
62
|
+
Data files are cached under `~/.cache/stathead/<ref>/` after the first
|
|
63
|
+
download. Subsequent runs read from disk — no network roundtrip. Delete the
|
|
64
|
+
cache directory or call `clear_cache()` to force a refresh.
|
|
65
|
+
|
|
66
|
+
## Available loaders
|
|
67
|
+
|
|
68
|
+
| Function | Returns | Shape |
|
|
69
|
+
|---|---|---|
|
|
70
|
+
| `load_career_predictions_2026()` | 2026 rookie predictions | ~77 × ~80 cols |
|
|
71
|
+
| `load_career_backtest()` | Historical rookies with pred + actual PPG | ~1087 × ~100 cols |
|
|
72
|
+
| `load_adp_historical()` | Model-training ADP 2010-2025 | 4507 × 10 |
|
|
73
|
+
| `load_adp_ffc(season=None)` | FFC PPR raw ADP (per season as fetched) | variable |
|
|
74
|
+
| `load_ktc()` | Current KTC dynasty values | ~500 × 9 |
|
|
75
|
+
| `load_ktc_history()` | Daily KTC history | ~100k × 7 |
|
|
76
|
+
| `load_prospect_grades(year=2026)` | Draft scouting grades | ~200 × 7 |
|
|
77
|
+
| `load_feature_matrix()` | Raw `feature-matrix.json` (dict) | — |
|
|
78
|
+
| `load_manual_overrides()` | Manual CFBD usage overrides (dict) | — |
|
|
79
|
+
|
|
80
|
+
## Licensing & attribution
|
|
81
|
+
|
|
82
|
+
Package code is MIT-licensed. The data this package retrieves is derived
|
|
83
|
+
from the StatHead project's own modeling pipeline; upstream sources
|
|
84
|
+
(nflverse, FFC, KeepTradeCut, CFBD, etc.) retain their own terms — see
|
|
85
|
+
each source's license before redistributing. If you're building on these
|
|
86
|
+
predictions, a link back to the StatHead repo is appreciated but not
|
|
87
|
+
required.
|
|
88
|
+
|
|
89
|
+
## Contributing
|
|
90
|
+
|
|
91
|
+
The package is small and focused — see
|
|
92
|
+
[`python/src/stathead/`](./src/stathead/) for the loader modules. Issues
|
|
93
|
+
and PRs welcome at the [main repo](https://github.com/dachhack/stathead).
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "stathead"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Python client for the StatHead fantasy football model — rookie career predictions, historical ADP, dynasty values, and flattened feature matrices."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "StatHead" }]
|
|
13
|
+
keywords = ["fantasy-football", "nfl", "dynasty", "rookie-prospects", "adp", "ktc"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
20
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"pandas>=1.5",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
polars = ["polars>=0.20"]
|
|
28
|
+
duckdb = ["duckdb>=0.10"]
|
|
29
|
+
dev = ["pytest>=7", "polars>=0.20", "duckdb>=0.10"]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/dachhack/stathead"
|
|
33
|
+
Source = "https://github.com/dachhack/stathead"
|
|
34
|
+
Issues = "https://github.com/dachhack/stathead/issues"
|
|
35
|
+
|
|
36
|
+
[tool.hatch.build.targets.wheel]
|
|
37
|
+
packages = ["src/stathead"]
|
|
38
|
+
|
|
39
|
+
[tool.pytest.ini_options]
|
|
40
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""StatHead — Python client for the fantasy football model.
|
|
2
|
+
|
|
3
|
+
Loaders return pandas DataFrames sourced from the upstream repo at
|
|
4
|
+
https://github.com/dachhack/stathead. First call downloads + caches the
|
|
5
|
+
underlying JSON/CSV; subsequent calls read from the cache.
|
|
6
|
+
|
|
7
|
+
Quick start:
|
|
8
|
+
|
|
9
|
+
import stathead as sh
|
|
10
|
+
|
|
11
|
+
rookies = sh.load_career_predictions_2026()
|
|
12
|
+
backtest = sh.load_career_backtest()
|
|
13
|
+
adp = sh.load_adp_historical()
|
|
14
|
+
ktc = sh.load_ktc()
|
|
15
|
+
|
|
16
|
+
Pin to a specific commit for reproducibility:
|
|
17
|
+
|
|
18
|
+
sh.pin_version("a6720e5") # or any git ref/tag/SHA
|
|
19
|
+
"""
|
|
20
|
+
from ._fetch import clear_cache, pin_version, set_ref
|
|
21
|
+
from .adp import load_adp_ffc, load_adp_historical
|
|
22
|
+
from .features import load_feature_matrix, load_manual_overrides
|
|
23
|
+
from .ktc import load_ktc, load_ktc_history
|
|
24
|
+
from .predictions import load_career_backtest, load_career_predictions_2026
|
|
25
|
+
from .prospects import load_prospect_grades
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"__version__",
|
|
31
|
+
"clear_cache",
|
|
32
|
+
"pin_version",
|
|
33
|
+
"set_ref",
|
|
34
|
+
"load_adp_ffc",
|
|
35
|
+
"load_adp_historical",
|
|
36
|
+
"load_career_backtest",
|
|
37
|
+
"load_career_predictions_2026",
|
|
38
|
+
"load_feature_matrix",
|
|
39
|
+
"load_ktc",
|
|
40
|
+
"load_ktc_history",
|
|
41
|
+
"load_manual_overrides",
|
|
42
|
+
"load_prospect_grades",
|
|
43
|
+
]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Fetch + on-disk cache for files hosted in the stathead repo.
|
|
2
|
+
|
|
3
|
+
All loaders go through :func:`fetch_json` / :func:`fetch_csv_gz`, which
|
|
4
|
+
resolve the URL against the currently-pinned git ref and cache the raw
|
|
5
|
+
bytes under ``~/.cache/stathead/<ref>/<path>``.
|
|
6
|
+
|
|
7
|
+
Override the ref via :func:`pin_version` (per-session) or the
|
|
8
|
+
``STATHEAD_REF`` env var (per-shell).
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import gzip
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import shutil
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
from urllib.request import Request, urlopen
|
|
19
|
+
|
|
20
|
+
_BASE_URL = "https://raw.githubusercontent.com/dachhack/stathead"
|
|
21
|
+
_DEFAULT_REF = "main"
|
|
22
|
+
_ref: str = os.environ.get("STATHEAD_REF", _DEFAULT_REF)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def pin_version(ref: str) -> None:
|
|
26
|
+
"""Pin all future loaders to a specific git ref (commit SHA, tag, or
|
|
27
|
+
branch name). Passing ``"main"`` resets to latest."""
|
|
28
|
+
set_ref(ref)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def set_ref(ref: str) -> None:
|
|
32
|
+
"""Alias of :func:`pin_version` with a clearer name."""
|
|
33
|
+
global _ref
|
|
34
|
+
_ref = ref
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def current_ref() -> str:
|
|
38
|
+
"""Return the git ref currently used for fetches."""
|
|
39
|
+
return _ref
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _cache_root() -> Path:
|
|
43
|
+
try:
|
|
44
|
+
from platformdirs import user_cache_dir # type: ignore
|
|
45
|
+
|
|
46
|
+
return Path(user_cache_dir("stathead"))
|
|
47
|
+
except ImportError:
|
|
48
|
+
root = os.environ.get("XDG_CACHE_HOME")
|
|
49
|
+
if root:
|
|
50
|
+
return Path(root) / "stathead"
|
|
51
|
+
return Path.home() / ".cache" / "stathead"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _cache_path(path: str) -> Path:
|
|
55
|
+
return _cache_root() / _ref / path
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def clear_cache(ref: str | None = None) -> None:
|
|
59
|
+
"""Remove cached files for the given ref (default: current ref).
|
|
60
|
+
Pass ``"*"`` to nuke the entire cache."""
|
|
61
|
+
if ref == "*":
|
|
62
|
+
root = _cache_root()
|
|
63
|
+
else:
|
|
64
|
+
root = _cache_root() / (ref or _ref)
|
|
65
|
+
if root.exists():
|
|
66
|
+
shutil.rmtree(root)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _fetch(path: str) -> bytes:
|
|
70
|
+
cache = _cache_path(path)
|
|
71
|
+
if cache.exists():
|
|
72
|
+
return cache.read_bytes()
|
|
73
|
+
url = f"{_BASE_URL}/{_ref}/{path}"
|
|
74
|
+
req = Request(url, headers={"User-Agent": "stathead-py"})
|
|
75
|
+
with urlopen(req, timeout=60) as resp: # noqa: S310 — trusted domain
|
|
76
|
+
data = resp.read()
|
|
77
|
+
cache.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
cache.write_bytes(data)
|
|
79
|
+
return data
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def fetch_json(path: str) -> Any:
|
|
83
|
+
"""Fetch a JSON file from the repo and parse it."""
|
|
84
|
+
return json.loads(_fetch(path).decode("utf-8"))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def fetch_csv_gz(path: str) -> bytes:
|
|
88
|
+
"""Fetch a .csv.gz file and return the decompressed CSV bytes."""
|
|
89
|
+
return gzip.decompress(_fetch(path))
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""ADP loaders — historical (all seasons, model training data) + FFC raw."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from ._fetch import fetch_json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_adp_historical() -> pd.DataFrame:
|
|
10
|
+
"""Historical ADP used in model training, fully populated 2010-2025.
|
|
11
|
+
|
|
12
|
+
Sourced from the feature-store profile + players shards (already
|
|
13
|
+
normalized across sources — this is the ADP the model actually trains
|
|
14
|
+
on, not a single vendor's raw response).
|
|
15
|
+
|
|
16
|
+
Columns: ``season``, ``name``, ``name_norm``, ``position``, ``adp``,
|
|
17
|
+
``adpRound``, ``nflDraftPick``, ``nflDraftRound``, ``age``,
|
|
18
|
+
``yearsInLeague``.
|
|
19
|
+
|
|
20
|
+
Use this for any cross-year ADP analysis — it has no gaps.
|
|
21
|
+
"""
|
|
22
|
+
profile = fetch_json("public/data/feature-store/profile.json")
|
|
23
|
+
players = fetch_json("public/data/feature-store/players.json")
|
|
24
|
+
rows: list[dict] = []
|
|
25
|
+
for key, rec in profile.items():
|
|
26
|
+
if "::" not in key:
|
|
27
|
+
continue
|
|
28
|
+
name_norm, season_str = key.rsplit("::", 1)
|
|
29
|
+
try:
|
|
30
|
+
season = int(season_str)
|
|
31
|
+
except ValueError:
|
|
32
|
+
continue
|
|
33
|
+
info = players.get(key) or {}
|
|
34
|
+
rows.append({
|
|
35
|
+
"season": season,
|
|
36
|
+
"name": info.get("displayName", name_norm),
|
|
37
|
+
"name_norm": name_norm,
|
|
38
|
+
"position": info.get("position"),
|
|
39
|
+
"adp": rec.get("adp"),
|
|
40
|
+
"adpRound": rec.get("adpRound"),
|
|
41
|
+
"nflDraftPick": rec.get("nflDraftPick"),
|
|
42
|
+
"nflDraftRound": rec.get("nflDraftRound"),
|
|
43
|
+
"age": rec.get("age"),
|
|
44
|
+
"yearsInLeague": rec.get("yearsInLeague"),
|
|
45
|
+
})
|
|
46
|
+
return pd.DataFrame(rows).sort_values(["season", "adp"], na_position="last").reset_index(drop=True)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def load_adp_ffc(season: int | None = None) -> pd.DataFrame:
|
|
50
|
+
"""FantasyFootballCalculator PPR ADP — raw API responses.
|
|
51
|
+
|
|
52
|
+
Coverage depends on what has been fetched into the repo (currently 2025).
|
|
53
|
+
Pass a ``season`` to filter, or leave ``None`` to load everything available.
|
|
54
|
+
|
|
55
|
+
Columns: ``season``, ``name``, ``position``, ``team``, ``adp``, ``high``,
|
|
56
|
+
``low``, ``stdev``, ``timesDrafted``, ``bye``.
|
|
57
|
+
"""
|
|
58
|
+
seasons = [season] if season else [2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026]
|
|
59
|
+
rows: list[dict] = []
|
|
60
|
+
for s in seasons:
|
|
61
|
+
try:
|
|
62
|
+
d = fetch_json(f"public/data/ffc_adp_ppr_{s}.json")
|
|
63
|
+
except Exception:
|
|
64
|
+
continue
|
|
65
|
+
for p in d.get("players") or []:
|
|
66
|
+
rows.append({"season": s, **p})
|
|
67
|
+
return pd.DataFrame(rows)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Raw feature matrix + manual feature overrides used by the pipeline."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from ._fetch import fetch_json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_feature_matrix() -> dict[str, Any]:
|
|
10
|
+
"""Return the full ``feature-matrix.json`` as a dict.
|
|
11
|
+
|
|
12
|
+
The file contains many sub-structures (``careerPredictions2026``,
|
|
13
|
+
``predRows``, ``vorNorm``, schema-per-position, etc.). Most users want
|
|
14
|
+
:func:`~stathead.load_career_predictions_2026` instead — this is an
|
|
15
|
+
escape hatch for advanced introspection.
|
|
16
|
+
"""
|
|
17
|
+
return fetch_json("public/data/feature-matrix.json")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load_manual_overrides() -> dict[str, Any]:
|
|
21
|
+
"""Manual CFBD usage overrides keyed by ``"Name|POS"``.
|
|
22
|
+
|
|
23
|
+
Contains per-season carries / receptions / team totals for players
|
|
24
|
+
missing from CFBD's player-usage file (Odell Beckham Jr., Duke Johnson,
|
|
25
|
+
Todd Gurley, etc.). See ``scripts/backfill_cfbd_variants.py``.
|
|
26
|
+
"""
|
|
27
|
+
return fetch_json("public/data/manual-cfbd-overrides.json")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""KeepTradeCut dynasty values — current snapshot + daily history."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from ._fetch import fetch_json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_ktc() -> pd.DataFrame:
|
|
10
|
+
"""Current KTC dynasty values, one row per player.
|
|
11
|
+
|
|
12
|
+
The 1QB rankings file carries both the ``value`` (1QB) and
|
|
13
|
+
``superflexValue`` in the same record, so a single fetch gives both
|
|
14
|
+
formats.
|
|
15
|
+
|
|
16
|
+
Columns: ``playerID``, ``name``, ``position``, ``positionRank``, ``team``,
|
|
17
|
+
``age``, ``value_1qb``, ``value_superflex``, ``isRookie``.
|
|
18
|
+
"""
|
|
19
|
+
raw = fetch_json("public/data/ktc_rankings_1qb.json")
|
|
20
|
+
rows = [{
|
|
21
|
+
"playerID": r["playerID"],
|
|
22
|
+
"name": r["playerName"],
|
|
23
|
+
"position": r["position"],
|
|
24
|
+
"positionRank": r.get("positionRank"),
|
|
25
|
+
"team": r.get("team"),
|
|
26
|
+
"age": r.get("age"),
|
|
27
|
+
"value_1qb": r.get("value"),
|
|
28
|
+
"value_superflex": r.get("superflexValue"),
|
|
29
|
+
"isRookie": r.get("isRookie", False),
|
|
30
|
+
} for r in raw]
|
|
31
|
+
return pd.DataFrame(rows)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def load_ktc_history() -> pd.DataFrame:
|
|
35
|
+
"""Daily KTC value history — one row per (playerID, date).
|
|
36
|
+
|
|
37
|
+
1QB and Superflex values are emitted side-by-side to make trend /
|
|
38
|
+
momentum queries easy. ~100k rows (200+ days × 500 players).
|
|
39
|
+
|
|
40
|
+
Columns: ``playerID``, ``name``, ``position``, ``team``, ``date``,
|
|
41
|
+
``value_1qb``, ``value_superflex``.
|
|
42
|
+
"""
|
|
43
|
+
current = fetch_json("public/data/ktc_rankings_1qb.json")
|
|
44
|
+
info_by_id = {r["playerID"]: {
|
|
45
|
+
"name": r["playerName"], "position": r["position"], "team": r.get("team"),
|
|
46
|
+
} for r in current}
|
|
47
|
+
|
|
48
|
+
history = fetch_json("public/data/ktc_history.json")
|
|
49
|
+
rows: list[dict] = []
|
|
50
|
+
for h in history:
|
|
51
|
+
info = info_by_id.get(h["playerID"], {})
|
|
52
|
+
sf_map = {p["d"]: p["v"] for p in (h.get("superflex") or {}).get("valueHistory") or []}
|
|
53
|
+
for p in (h.get("oneQB") or {}).get("valueHistory") or []:
|
|
54
|
+
rows.append({
|
|
55
|
+
"playerID": h["playerID"],
|
|
56
|
+
"name": info.get("name"),
|
|
57
|
+
"position": info.get("position"),
|
|
58
|
+
"team": info.get("team"),
|
|
59
|
+
"date": p["d"],
|
|
60
|
+
"value_1qb": p["v"],
|
|
61
|
+
"value_superflex": sf_map.get(p["d"]),
|
|
62
|
+
})
|
|
63
|
+
df = pd.DataFrame(rows)
|
|
64
|
+
if not df.empty:
|
|
65
|
+
df["date"] = pd.to_datetime(df["date"])
|
|
66
|
+
return df
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Rookie career model predictions — 2026 class + historical backtest."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from ._fetch import fetch_json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _flatten(row: dict) -> dict:
|
|
10
|
+
"""Lift ``features`` sub-dict into top-level columns."""
|
|
11
|
+
features = row.pop("features", None) or {}
|
|
12
|
+
return {**row, **features}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_career_predictions_2026() -> pd.DataFrame:
|
|
16
|
+
"""2026 draft-class career predictions, one row per scored prospect.
|
|
17
|
+
|
|
18
|
+
Columns (non-exhaustive): ``name``, ``position``, ``adp``, ``team``,
|
|
19
|
+
``predictedCareerPPG``, ``percentile``, ``modelTier``, ``boomProb``,
|
|
20
|
+
``bustProb``, ``boomZ``, ``bustZ``, plus every feature in the model
|
|
21
|
+
(``collegeDominatorRating``, ``collegeUsageOverall``, ``rspDotDraft``,
|
|
22
|
+
``pdfRankOverallMean``, ``recruitRating``, ``relativeAthleticScore``, …).
|
|
23
|
+
"""
|
|
24
|
+
fm = fetch_json("public/data/feature-matrix.json")
|
|
25
|
+
rows = [_flatten(dict(p)) for p in fm.get("careerPredictions2026") or []]
|
|
26
|
+
return pd.DataFrame(rows)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def load_career_backtest() -> pd.DataFrame:
|
|
30
|
+
"""Historical rookie backtest rows (2010-2025) with predictedPPG + actualPPG.
|
|
31
|
+
|
|
32
|
+
Used to evaluate the career model (both the precomputed tier system and
|
|
33
|
+
the GBM + Ridge ensemble). One row per drafted prospect across QB/RB/WR/TE.
|
|
34
|
+
|
|
35
|
+
Columns: ``name``, ``position``, ``draftSeason``, ``predictedPPG``,
|
|
36
|
+
``actualPPG``, ``percentile``, ``modelTier``, ``combinedScore``,
|
|
37
|
+
plus every feature used in training.
|
|
38
|
+
"""
|
|
39
|
+
cache = fetch_json("public/data/model-cache-career-v72.json")
|
|
40
|
+
out: list[dict] = []
|
|
41
|
+
for pos, model in (cache.get("rookieCareerModels") or {}).items():
|
|
42
|
+
for r in model.get("backtestRows") or []:
|
|
43
|
+
row = dict(r)
|
|
44
|
+
row.setdefault("position", pos)
|
|
45
|
+
out.append(_flatten(row))
|
|
46
|
+
return pd.DataFrame(out)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""2026 draft prospect scouting grades (NFL.com / PFN composite)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from ._fetch import fetch_json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_prospect_grades(year: int = 2026) -> pd.DataFrame:
|
|
10
|
+
"""NFL draft prospect scouting grades for the given class.
|
|
11
|
+
|
|
12
|
+
Currently only ``year=2026`` is distributed with the repo.
|
|
13
|
+
|
|
14
|
+
Columns: ``name``, ``pos``, ``school``, ``grade``, ``projRound``,
|
|
15
|
+
``projPick``, ``tier``.
|
|
16
|
+
"""
|
|
17
|
+
data = fetch_json(f"src/data/prospect-grades-{year}.json")
|
|
18
|
+
return pd.DataFrame(data)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Smoke tests — verifies every loader returns a non-empty DataFrame when
|
|
2
|
+
run against a local checkout (pointed at the current working tree via a
|
|
3
|
+
file:// fetch override).
|
|
4
|
+
|
|
5
|
+
These tests run offline by reading files directly out of the repo's
|
|
6
|
+
``public/data`` folder. This keeps CI fast and avoids hitting GitHub.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
REPO = Path(__file__).resolve().parents[2]
|
|
18
|
+
sys.path.insert(0, str(REPO / "python" / "src"))
|
|
19
|
+
|
|
20
|
+
import stathead # noqa: E402
|
|
21
|
+
from stathead import _fetch # noqa: E402
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture(autouse=True)
|
|
25
|
+
def local_fetch(monkeypatch):
|
|
26
|
+
"""Redirect the package fetcher at the current working tree."""
|
|
27
|
+
def _read(path: str) -> bytes:
|
|
28
|
+
full = REPO / path
|
|
29
|
+
return full.read_bytes()
|
|
30
|
+
|
|
31
|
+
monkeypatch.setattr(_fetch, "_fetch", _read)
|
|
32
|
+
yield
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_career_predictions_2026_has_rookies():
|
|
36
|
+
df = stathead.load_career_predictions_2026()
|
|
37
|
+
assert not df.empty
|
|
38
|
+
assert {"name", "position", "predictedCareerPPG", "modelTier", "percentile"}.issubset(df.columns)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_career_backtest_has_history():
|
|
42
|
+
df = stathead.load_career_backtest()
|
|
43
|
+
assert len(df) > 800
|
|
44
|
+
assert {"name", "position", "predictedPPG", "actualPPG", "modelTier"}.issubset(df.columns)
|
|
45
|
+
assert df.position.isin(["QB", "RB", "WR", "TE"]).all()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_adp_historical_is_fully_populated():
|
|
49
|
+
df = stathead.load_adp_historical()
|
|
50
|
+
assert len(df) > 4000
|
|
51
|
+
assert {"season", "name", "position", "adp"}.issubset(df.columns)
|
|
52
|
+
# Every season from 2010-2025 should be present.
|
|
53
|
+
assert set(range(2010, 2026)).issubset(set(df.season.unique()))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_ktc_current_values():
|
|
57
|
+
df = stathead.load_ktc()
|
|
58
|
+
assert not df.empty
|
|
59
|
+
assert {"name", "value_1qb", "value_superflex", "isRookie"}.issubset(df.columns)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_ktc_history_is_long():
|
|
63
|
+
df = stathead.load_ktc_history()
|
|
64
|
+
assert not df.empty
|
|
65
|
+
assert {"playerID", "date", "value_1qb"}.issubset(df.columns)
|
|
66
|
+
assert pd.api.types.is_datetime64_any_dtype(df.date)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_prospect_grades_2026():
|
|
70
|
+
df = stathead.load_prospect_grades(2026)
|
|
71
|
+
assert not df.empty
|
|
72
|
+
assert {"name", "pos", "grade"}.issubset(df.columns)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_feature_matrix_is_dict():
|
|
76
|
+
m = stathead.load_feature_matrix()
|
|
77
|
+
assert isinstance(m, dict)
|
|
78
|
+
assert "careerPredictions2026" in m
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_manual_overrides_keyed_by_name_pos():
|
|
82
|
+
o = stathead.load_manual_overrides()
|
|
83
|
+
assert isinstance(o, dict)
|
|
84
|
+
keys = [k for k in o if not k.startswith("_")]
|
|
85
|
+
assert any("|" in k for k in keys)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_pin_version_changes_cache_root(tmp_path, monkeypatch):
|
|
89
|
+
# With a custom cache root, the cache file should end up under <ref>/<path>.
|
|
90
|
+
monkeypatch.setattr(_fetch, "_cache_root", lambda: tmp_path)
|
|
91
|
+
_fetch.set_ref("abc123")
|
|
92
|
+
p = _fetch._cache_path("foo/bar.json")
|
|
93
|
+
assert p.parent.name == "foo"
|
|
94
|
+
assert p.parent.parent.name == "abc123"
|
|
95
|
+
_fetch.set_ref("main") # reset
|