stathead 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ node_modules
2
+ dist
3
+ .DS_Store
4
+ *.local
5
+ public/data/*
6
+ !public/data/training-rows-cache-*.json
7
+ !public/data/trained-models-cache-*.json
8
+ !public/data/model-cache-*.json
9
+ !public/data/feature-store/
10
+ !public/data/score-store/
11
+ !public/data/ktc_rankings_*.json
12
+ !public/data/ktc_history.json
13
+ !public/data/nflverse_weekly_2025.json
14
+ !public/data/ppg-feature-ablation.json
15
+ !public/data/ppg-hyperparam-sweep.json
16
+ !public/data/residual-feature-ablation.json
17
+ !public/data/residual-hyperparam-sweep.json
18
+ !public/data/ktc-feature-ablation.json
19
+ !public/data/ktc-hyperparam-sweep.json
20
+ !public/data/pdf-career-ablation-predraft.json
21
+ !public/data/pdf-career-ablation-postdraft.json
22
+ !public/data/rsp-career-ablation-predraft.json
23
+ !public/data/rsp-career-ablation-postdraft.json
24
+ !public/data/pdf-only-career-test.json
25
+ !public/data/qb-beast-ablation.json
26
+ !public/data/ktc-forecasts-*.json
27
+ !public/data/redraft-projections.json
28
+ !public/data/cfbd-college-stats.json
29
+ !public/data/cfbd-sp-ratings.json
30
+ !public/data/cfbd-recruiting.json
31
+ !public/data/cfbd-games.json
32
+ !public/data/cfbd-team-talent.json
33
+ !public/data/cfbd-player-usage.json
34
+ !public/data/cfbd/
35
+ !public/data/cfbd/*.json
36
+ !public/data/pdf-prospect-features.json
37
+ !public/data/pdf-prospect-features-merged.json
38
+ !public/data/rsp-historical-rankings.json
39
+ !public/data/rsp-qb-features.json
40
+ !public/data/manual-cfbd-overrides.json
41
+ # Raw upstream sources committed as .csv.gz (~35 MB vs ~200 MB raw).
42
+ # TS readLocalFile auto-decompresses; Python runs `bash scripts/extract-data.sh`
43
+ # once to materialize the .csv. See scripts/pull-all-data-sources.sh.
44
+ !public/data/*.csv.gz
45
+ !public/data/ffc_adp_ppr_*.json
46
+ !public/data/espn_adp_*.json
47
+ .claude/*
48
+ !.claude/commands/
49
+ !.claude/commands/**
50
+ CLAUDE.local.md
51
+ __pycache__/
52
+ .nflverse-cache/
53
+ .cache/
54
+ .venv/
55
+ pdfs/
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: stathead
3
+ Version: 0.1.0
4
+ Summary: Python client for the StatHead fantasy football model — rookie career predictions, historical ADP, dynasty values, and flattened feature matrices.
5
+ Project-URL: Homepage, https://github.com/dachhack/stathead
6
+ Project-URL: Source, https://github.com/dachhack/stathead
7
+ Project-URL: Issues, https://github.com/dachhack/stathead/issues
8
+ Author: StatHead
9
+ License: MIT
10
+ Keywords: adp,dynasty,fantasy-football,ktc,nfl,rookie-prospects
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: pandas>=1.5
19
+ Provides-Extra: dev
20
+ Requires-Dist: duckdb>=0.10; extra == 'dev'
21
+ Requires-Dist: polars>=0.20; extra == 'dev'
22
+ Requires-Dist: pytest>=7; extra == 'dev'
23
+ Provides-Extra: duckdb
24
+ Requires-Dist: duckdb>=0.10; extra == 'duckdb'
25
+ Provides-Extra: polars
26
+ Requires-Dist: polars>=0.20; extra == 'polars'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # stathead
30
+
31
+ Python client for the [StatHead](https://github.com/dachhack/stathead) fantasy football model. Returns pandas DataFrames of rookie career predictions, historical ADP, KeepTradeCut dynasty values, and the flattened feature matrix used to train the models.
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install stathead
37
+ ```
38
+
39
+ Optional extras:
40
+
41
+ ```bash
42
+ pip install "stathead[polars]" # for .to_polars() helpers
43
+ pip install "stathead[duckdb]" # for local SQL querying
44
+ ```
45
+
46
+ ## Quick start
47
+
48
+ ```python
49
+ import stathead as sh
50
+
51
+ # 2026 rookie class predictions (77 players × ~80 columns)
52
+ rookies = sh.load_career_predictions_2026()
53
+ rookies.nlargest(10, "percentile")[["name", "position", "predictedCareerPPG", "modelTier"]]
54
+
55
+ # Historical backtest — predicted vs actual for every drafted rookie 2010-2025
56
+ backtest = sh.load_career_backtest()
57
+ wr = backtest[backtest.position == "WR"]
58
+ wr.groupby("modelTier")[["actualPPG", "predictedPPG"]].mean()
59
+
60
+ # Historical ADP, every season fully populated
61
+ adp = sh.load_adp_historical()
62
+ adp[(adp.season == 2023) & (adp.adp <= 24)]
63
+
64
+ # Dynasty values
65
+ ktc = sh.load_ktc()
66
+ ktc.nlargest(25, "value_1qb")
67
+
68
+ # Daily KTC history (for momentum / trend analysis)
69
+ hist = sh.load_ktc_history()
70
+ hist.pivot_table(index="date", columns="name", values="value_1qb")
71
+ ```
72
+
73
+ ## Pinning to a specific version
74
+
75
+ Loaders resolve against the upstream GitHub repo. Pin to a commit SHA, tag,
76
+ or branch for reproducibility:
77
+
78
+ ```python
79
+ sh.pin_version("a6720e5") # or a tagged release
80
+ ```
81
+
82
+ Clear the local cache if you want to re-fetch:
83
+
84
+ ```python
85
+ sh.clear_cache()
86
+ ```
87
+
88
+ ## Data freshness
89
+
90
+ Data files are cached under `~/.cache/stathead/<ref>/` after the first
91
+ download. Subsequent runs read from disk — no network roundtrip. Delete the
92
+ cache directory or call `clear_cache()` to force a refresh.
93
+
94
+ ## Available loaders
95
+
96
+ | Function | Returns | Shape |
97
+ |---|---|---|
98
+ | `load_career_predictions_2026()` | 2026 rookie predictions | ~77 × ~80 cols |
99
+ | `load_career_backtest()` | Historical rookies with pred + actual PPG | ~1087 × ~100 cols |
100
+ | `load_adp_historical()` | Model-training ADP 2010-2025 | 4507 × 10 |
101
+ | `load_adp_ffc(season=None)` | FFC PPR raw ADP (per season as fetched) | variable |
102
+ | `load_ktc()` | Current KTC dynasty values | ~500 × 9 |
103
+ | `load_ktc_history()` | Daily KTC history | ~100k × 7 |
104
+ | `load_prospect_grades(year=2026)` | Draft scouting grades | ~200 × 7 |
105
+ | `load_feature_matrix()` | Raw `feature-matrix.json` (dict) | — |
106
+ | `load_manual_overrides()` | Manual CFBD usage overrides (dict) | — |
107
+
108
+ ## Licensing & attribution
109
+
110
+ Package code is MIT-licensed. The data this package retrieves is derived
111
+ from the StatHead project's own modeling pipeline; upstream sources
112
+ (nflverse, FFC, KeepTradeCut, CFBD, etc.) retain their own terms — see
113
+ each source's license before redistributing. If you're building on these
114
+ predictions, a link back to the StatHead repo is appreciated but not
115
+ required.
116
+
117
+ ## Contributing
118
+
119
+ The package is small and focused — see
120
+ [`python/src/stathead/`](./src/stathead/) for the loader modules. Issues
121
+ and PRs welcome at the [main repo](https://github.com/dachhack/stathead).
@@ -0,0 +1,93 @@
1
+ # stathead
2
+
3
+ Python client for the [StatHead](https://github.com/dachhack/stathead) fantasy football model. Returns pandas DataFrames of rookie career predictions, historical ADP, KeepTradeCut dynasty values, and the flattened feature matrix used to train the models.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install stathead
9
+ ```
10
+
11
+ Optional extras:
12
+
13
+ ```bash
14
+ pip install "stathead[polars]" # for .to_polars() helpers
15
+ pip install "stathead[duckdb]" # for local SQL querying
16
+ ```
17
+
18
+ ## Quick start
19
+
20
+ ```python
21
+ import stathead as sh
22
+
23
+ # 2026 rookie class predictions (77 players × ~80 columns)
24
+ rookies = sh.load_career_predictions_2026()
25
+ rookies.nlargest(10, "percentile")[["name", "position", "predictedCareerPPG", "modelTier"]]
26
+
27
+ # Historical backtest — predicted vs actual for every drafted rookie 2010-2025
28
+ backtest = sh.load_career_backtest()
29
+ wr = backtest[backtest.position == "WR"]
30
+ wr.groupby("modelTier")[["actualPPG", "predictedPPG"]].mean()
31
+
32
+ # Historical ADP, every season fully populated
33
+ adp = sh.load_adp_historical()
34
+ adp[(adp.season == 2023) & (adp.adp <= 24)]
35
+
36
+ # Dynasty values
37
+ ktc = sh.load_ktc()
38
+ ktc.nlargest(25, "value_1qb")
39
+
40
+ # Daily KTC history (for momentum / trend analysis)
41
+ hist = sh.load_ktc_history()
42
+ hist.pivot_table(index="date", columns="name", values="value_1qb")
43
+ ```
44
+
45
+ ## Pinning to a specific version
46
+
47
+ Loaders resolve against the upstream GitHub repo. Pin to a commit SHA, tag,
48
+ or branch for reproducibility:
49
+
50
+ ```python
51
+ sh.pin_version("a6720e5") # or a tagged release
52
+ ```
53
+
54
+ Clear the local cache if you want to re-fetch:
55
+
56
+ ```python
57
+ sh.clear_cache()
58
+ ```
59
+
60
+ ## Data freshness
61
+
62
+ Data files are cached under `~/.cache/stathead/<ref>/` after the first
63
+ download. Subsequent runs read from disk — no network roundtrip. Delete the
64
+ cache directory or call `clear_cache()` to force a refresh.
65
+
66
+ ## Available loaders
67
+
68
+ | Function | Returns | Shape |
69
+ |---|---|---|
70
+ | `load_career_predictions_2026()` | 2026 rookie predictions | ~77 × ~80 cols |
71
+ | `load_career_backtest()` | Historical rookies with pred + actual PPG | ~1087 × ~100 cols |
72
+ | `load_adp_historical()` | Model-training ADP 2010-2025 | 4507 × 10 |
73
+ | `load_adp_ffc(season=None)` | FFC PPR raw ADP (per season as fetched) | variable |
74
+ | `load_ktc()` | Current KTC dynasty values | ~500 × 9 |
75
+ | `load_ktc_history()` | Daily KTC history | ~100k × 7 |
76
+ | `load_prospect_grades(year=2026)` | Draft scouting grades | ~200 × 7 |
77
+ | `load_feature_matrix()` | Raw `feature-matrix.json` (dict) | — |
78
+ | `load_manual_overrides()` | Manual CFBD usage overrides (dict) | — |
79
+
80
+ ## Licensing & attribution
81
+
82
+ Package code is MIT-licensed. The data this package retrieves is derived
83
+ from the StatHead project's own modeling pipeline; upstream sources
84
+ (nflverse, FFC, KeepTradeCut, CFBD, etc.) retain their own terms — see
85
+ each source's license before redistributing. If you're building on these
86
+ predictions, a link back to the StatHead repo is appreciated but not
87
+ required.
88
+
89
+ ## Contributing
90
+
91
+ The package is small and focused — see
92
+ [`python/src/stathead/`](./src/stathead/) for the loader modules. Issues
93
+ and PRs welcome at the [main repo](https://github.com/dachhack/stathead).
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "stathead"
7
+ version = "0.1.0"
8
+ description = "Python client for the StatHead fantasy football model — rookie career predictions, historical ADP, dynasty values, and flattened feature matrices."
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "StatHead" }]
13
+ keywords = ["fantasy-football", "nfl", "dynasty", "rookie-prospects", "adp", "ktc"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3 :: Only",
20
+ "Topic :: Scientific/Engineering :: Information Analysis",
21
+ ]
22
+ dependencies = [
23
+ "pandas>=1.5",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ polars = ["polars>=0.20"]
28
+ duckdb = ["duckdb>=0.10"]
29
+ dev = ["pytest>=7", "polars>=0.20", "duckdb>=0.10"]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/dachhack/stathead"
33
+ Source = "https://github.com/dachhack/stathead"
34
+ Issues = "https://github.com/dachhack/stathead/issues"
35
+
36
+ [tool.hatch.build.targets.wheel]
37
+ packages = ["src/stathead"]
38
+
39
+ [tool.pytest.ini_options]
40
+ testpaths = ["tests"]
@@ -0,0 +1,43 @@
1
+ """StatHead — Python client for the fantasy football model.
2
+
3
+ Loaders return pandas DataFrames sourced from the upstream repo at
4
+ https://github.com/dachhack/stathead. First call downloads + caches the
5
+ underlying JSON/CSV; subsequent calls read from the cache.
6
+
7
+ Quick start:
8
+
9
+ import stathead as sh
10
+
11
+ rookies = sh.load_career_predictions_2026()
12
+ backtest = sh.load_career_backtest()
13
+ adp = sh.load_adp_historical()
14
+ ktc = sh.load_ktc()
15
+
16
+ Pin to a specific commit for reproducibility:
17
+
18
+ sh.pin_version("a6720e5") # or any git ref/tag/SHA
19
+ """
20
+ from ._fetch import clear_cache, pin_version, set_ref
21
+ from .adp import load_adp_ffc, load_adp_historical
22
+ from .features import load_feature_matrix, load_manual_overrides
23
+ from .ktc import load_ktc, load_ktc_history
24
+ from .predictions import load_career_backtest, load_career_predictions_2026
25
+ from .prospects import load_prospect_grades
26
+
27
+ __version__ = "0.1.0"
28
+
29
+ __all__ = [
30
+ "__version__",
31
+ "clear_cache",
32
+ "pin_version",
33
+ "set_ref",
34
+ "load_adp_ffc",
35
+ "load_adp_historical",
36
+ "load_career_backtest",
37
+ "load_career_predictions_2026",
38
+ "load_feature_matrix",
39
+ "load_ktc",
40
+ "load_ktc_history",
41
+ "load_manual_overrides",
42
+ "load_prospect_grades",
43
+ ]
@@ -0,0 +1,89 @@
1
+ """Fetch + on-disk cache for files hosted in the stathead repo.
2
+
3
+ All loaders go through :func:`fetch_json` / :func:`fetch_csv_gz`, which
4
+ resolve the URL against the currently-pinned git ref and cache the raw
5
+ bytes under ``~/.cache/stathead/<ref>/<path>``.
6
+
7
+ Override the ref via :func:`pin_version` (per-session) or the
8
+ ``STATHEAD_REF`` env var (per-shell).
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import gzip
13
+ import json
14
+ import os
15
+ import shutil
16
+ from pathlib import Path
17
+ from typing import Any
18
+ from urllib.request import Request, urlopen
19
+
20
+ _BASE_URL = "https://raw.githubusercontent.com/dachhack/stathead"
21
+ _DEFAULT_REF = "main"
22
+ _ref: str = os.environ.get("STATHEAD_REF", _DEFAULT_REF)
23
+
24
+
25
+ def pin_version(ref: str) -> None:
26
+ """Pin all future loaders to a specific git ref (commit SHA, tag, or
27
+ branch name). Passing ``"main"`` resets to latest."""
28
+ set_ref(ref)
29
+
30
+
31
+ def set_ref(ref: str) -> None:
32
+ """Alias of :func:`pin_version` with a clearer name."""
33
+ global _ref
34
+ _ref = ref
35
+
36
+
37
+ def current_ref() -> str:
38
+ """Return the git ref currently used for fetches."""
39
+ return _ref
40
+
41
+
42
+ def _cache_root() -> Path:
43
+ try:
44
+ from platformdirs import user_cache_dir # type: ignore
45
+
46
+ return Path(user_cache_dir("stathead"))
47
+ except ImportError:
48
+ root = os.environ.get("XDG_CACHE_HOME")
49
+ if root:
50
+ return Path(root) / "stathead"
51
+ return Path.home() / ".cache" / "stathead"
52
+
53
+
54
+ def _cache_path(path: str) -> Path:
55
+ return _cache_root() / _ref / path
56
+
57
+
58
+ def clear_cache(ref: str | None = None) -> None:
59
+ """Remove cached files for the given ref (default: current ref).
60
+ Pass ``"*"`` to nuke the entire cache."""
61
+ if ref == "*":
62
+ root = _cache_root()
63
+ else:
64
+ root = _cache_root() / (ref or _ref)
65
+ if root.exists():
66
+ shutil.rmtree(root)
67
+
68
+
69
+ def _fetch(path: str) -> bytes:
70
+ cache = _cache_path(path)
71
+ if cache.exists():
72
+ return cache.read_bytes()
73
+ url = f"{_BASE_URL}/{_ref}/{path}"
74
+ req = Request(url, headers={"User-Agent": "stathead-py"})
75
+ with urlopen(req, timeout=60) as resp: # noqa: S310 — trusted domain
76
+ data = resp.read()
77
+ cache.parent.mkdir(parents=True, exist_ok=True)
78
+ cache.write_bytes(data)
79
+ return data
80
+
81
+
82
+ def fetch_json(path: str) -> Any:
83
+ """Fetch a JSON file from the repo and parse it."""
84
+ return json.loads(_fetch(path).decode("utf-8"))
85
+
86
+
87
+ def fetch_csv_gz(path: str) -> bytes:
88
+ """Fetch a .csv.gz file and return the decompressed CSV bytes."""
89
+ return gzip.decompress(_fetch(path))
@@ -0,0 +1,67 @@
1
+ """ADP loaders — historical (all seasons, model training data) + FFC raw."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_adp_historical() -> pd.DataFrame:
10
+ """Historical ADP used in model training, fully populated 2010-2025.
11
+
12
+ Sourced from the feature-store profile + players shards (already
13
+ normalized across sources — this is the ADP the model actually trains
14
+ on, not a single vendor's raw response).
15
+
16
+ Columns: ``season``, ``name``, ``name_norm``, ``position``, ``adp``,
17
+ ``adpRound``, ``nflDraftPick``, ``nflDraftRound``, ``age``,
18
+ ``yearsInLeague``.
19
+
20
+ Use this for any cross-year ADP analysis — it has no gaps.
21
+ """
22
+ profile = fetch_json("public/data/feature-store/profile.json")
23
+ players = fetch_json("public/data/feature-store/players.json")
24
+ rows: list[dict] = []
25
+ for key, rec in profile.items():
26
+ if "::" not in key:
27
+ continue
28
+ name_norm, season_str = key.rsplit("::", 1)
29
+ try:
30
+ season = int(season_str)
31
+ except ValueError:
32
+ continue
33
+ info = players.get(key) or {}
34
+ rows.append({
35
+ "season": season,
36
+ "name": info.get("displayName", name_norm),
37
+ "name_norm": name_norm,
38
+ "position": info.get("position"),
39
+ "adp": rec.get("adp"),
40
+ "adpRound": rec.get("adpRound"),
41
+ "nflDraftPick": rec.get("nflDraftPick"),
42
+ "nflDraftRound": rec.get("nflDraftRound"),
43
+ "age": rec.get("age"),
44
+ "yearsInLeague": rec.get("yearsInLeague"),
45
+ })
46
+ return pd.DataFrame(rows).sort_values(["season", "adp"], na_position="last").reset_index(drop=True)
47
+
48
+
49
+ def load_adp_ffc(season: int | None = None) -> pd.DataFrame:
50
+ """FantasyFootballCalculator PPR ADP — raw API responses.
51
+
52
+ Coverage depends on what has been fetched into the repo (currently 2025).
53
+ Pass a ``season`` to filter, or leave ``None`` to load everything available.
54
+
55
+ Columns: ``season``, ``name``, ``position``, ``team``, ``adp``, ``high``,
56
+ ``low``, ``stdev``, ``timesDrafted``, ``bye``.
57
+ """
58
+ seasons = [season] if season else [2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026]
59
+ rows: list[dict] = []
60
+ for s in seasons:
61
+ try:
62
+ d = fetch_json(f"public/data/ffc_adp_ppr_{s}.json")
63
+ except Exception:
64
+ continue
65
+ for p in d.get("players") or []:
66
+ rows.append({"season": s, **p})
67
+ return pd.DataFrame(rows)
@@ -0,0 +1,27 @@
1
+ """Raw feature matrix + manual feature overrides used by the pipeline."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_feature_matrix() -> dict[str, Any]:
10
+ """Return the full ``feature-matrix.json`` as a dict.
11
+
12
+ The file contains many sub-structures (``careerPredictions2026``,
13
+ ``predRows``, ``vorNorm``, schema-per-position, etc.). Most users want
14
+ :func:`~stathead.load_career_predictions_2026` instead — this is an
15
+ escape hatch for advanced introspection.
16
+ """
17
+ return fetch_json("public/data/feature-matrix.json")
18
+
19
+
20
+ def load_manual_overrides() -> dict[str, Any]:
21
+ """Manual CFBD usage overrides keyed by ``"Name|POS"``.
22
+
23
+ Contains per-season carries / receptions / team totals for players
24
+ missing from CFBD's player-usage file (Odell Beckham Jr., Duke Johnson,
25
+ Todd Gurley, etc.). See ``scripts/backfill_cfbd_variants.py``.
26
+ """
27
+ return fetch_json("public/data/manual-cfbd-overrides.json")
@@ -0,0 +1,66 @@
1
+ """KeepTradeCut dynasty values — current snapshot + daily history."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_ktc() -> pd.DataFrame:
10
+ """Current KTC dynasty values, one row per player.
11
+
12
+ The 1QB rankings file carries both the ``value`` (1QB) and
13
+ ``superflexValue`` in the same record, so a single fetch gives both
14
+ formats.
15
+
16
+ Columns: ``playerID``, ``name``, ``position``, ``positionRank``, ``team``,
17
+ ``age``, ``value_1qb``, ``value_superflex``, ``isRookie``.
18
+ """
19
+ raw = fetch_json("public/data/ktc_rankings_1qb.json")
20
+ rows = [{
21
+ "playerID": r["playerID"],
22
+ "name": r["playerName"],
23
+ "position": r["position"],
24
+ "positionRank": r.get("positionRank"),
25
+ "team": r.get("team"),
26
+ "age": r.get("age"),
27
+ "value_1qb": r.get("value"),
28
+ "value_superflex": r.get("superflexValue"),
29
+ "isRookie": r.get("isRookie", False),
30
+ } for r in raw]
31
+ return pd.DataFrame(rows)
32
+
33
+
34
+ def load_ktc_history() -> pd.DataFrame:
35
+ """Daily KTC value history — one row per (playerID, date).
36
+
37
+ 1QB and Superflex values are emitted side-by-side to make trend /
38
+ momentum queries easy. ~100k rows (200+ days × 500 players).
39
+
40
+ Columns: ``playerID``, ``name``, ``position``, ``team``, ``date``,
41
+ ``value_1qb``, ``value_superflex``.
42
+ """
43
+ current = fetch_json("public/data/ktc_rankings_1qb.json")
44
+ info_by_id = {r["playerID"]: {
45
+ "name": r["playerName"], "position": r["position"], "team": r.get("team"),
46
+ } for r in current}
47
+
48
+ history = fetch_json("public/data/ktc_history.json")
49
+ rows: list[dict] = []
50
+ for h in history:
51
+ info = info_by_id.get(h["playerID"], {})
52
+ sf_map = {p["d"]: p["v"] for p in (h.get("superflex") or {}).get("valueHistory") or []}
53
+ for p in (h.get("oneQB") or {}).get("valueHistory") or []:
54
+ rows.append({
55
+ "playerID": h["playerID"],
56
+ "name": info.get("name"),
57
+ "position": info.get("position"),
58
+ "team": info.get("team"),
59
+ "date": p["d"],
60
+ "value_1qb": p["v"],
61
+ "value_superflex": sf_map.get(p["d"]),
62
+ })
63
+ df = pd.DataFrame(rows)
64
+ if not df.empty:
65
+ df["date"] = pd.to_datetime(df["date"])
66
+ return df
@@ -0,0 +1,46 @@
1
+ """Rookie career model predictions — 2026 class + historical backtest."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def _flatten(row: dict) -> dict:
10
+ """Lift ``features`` sub-dict into top-level columns."""
11
+ features = row.pop("features", None) or {}
12
+ return {**row, **features}
13
+
14
+
15
+ def load_career_predictions_2026() -> pd.DataFrame:
16
+ """2026 draft-class career predictions, one row per scored prospect.
17
+
18
+ Columns (non-exhaustive): ``name``, ``position``, ``adp``, ``team``,
19
+ ``predictedCareerPPG``, ``percentile``, ``modelTier``, ``boomProb``,
20
+ ``bustProb``, ``boomZ``, ``bustZ``, plus every feature in the model
21
+ (``collegeDominatorRating``, ``collegeUsageOverall``, ``rspDotDraft``,
22
+ ``pdfRankOverallMean``, ``recruitRating``, ``relativeAthleticScore``, …).
23
+ """
24
+ fm = fetch_json("public/data/feature-matrix.json")
25
+ rows = [_flatten(dict(p)) for p in fm.get("careerPredictions2026") or []]
26
+ return pd.DataFrame(rows)
27
+
28
+
29
+ def load_career_backtest() -> pd.DataFrame:
30
+ """Historical rookie backtest rows (2010-2025) with predictedPPG + actualPPG.
31
+
32
+ Used to evaluate the career model (both the precomputed tier system and
33
+ the GBM + Ridge ensemble). One row per drafted prospect across QB/RB/WR/TE.
34
+
35
+ Columns: ``name``, ``position``, ``draftSeason``, ``predictedPPG``,
36
+ ``actualPPG``, ``percentile``, ``modelTier``, ``combinedScore``,
37
+ plus every feature used in training.
38
+ """
39
+ cache = fetch_json("public/data/model-cache-career-v72.json")
40
+ out: list[dict] = []
41
+ for pos, model in (cache.get("rookieCareerModels") or {}).items():
42
+ for r in model.get("backtestRows") or []:
43
+ row = dict(r)
44
+ row.setdefault("position", pos)
45
+ out.append(_flatten(row))
46
+ return pd.DataFrame(out)
@@ -0,0 +1,18 @@
1
+ """2026 draft prospect scouting grades (NFL.com / PFN composite)."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_prospect_grades(year: int = 2026) -> pd.DataFrame:
10
+ """NFL draft prospect scouting grades for the given class.
11
+
12
+ Currently only ``year=2026`` is distributed with the repo.
13
+
14
+ Columns: ``name``, ``pos``, ``school``, ``grade``, ``projRound``,
15
+ ``projPick``, ``tier``.
16
+ """
17
+ data = fetch_json(f"src/data/prospect-grades-{year}.json")
18
+ return pd.DataFrame(data)
@@ -0,0 +1,95 @@
1
+ """Smoke tests — verifies every loader returns a non-empty DataFrame when
2
+ run against a local checkout (pointed at the current working tree via a
3
+ file:// fetch override).
4
+
5
+ These tests run offline by reading files directly out of the repo's
6
+ ``public/data`` folder. This keeps CI fast and avoids hitting GitHub.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ import pandas as pd
15
+ import pytest
16
+
17
+ REPO = Path(__file__).resolve().parents[2]
18
+ sys.path.insert(0, str(REPO / "python" / "src"))
19
+
20
+ import stathead # noqa: E402
21
+ from stathead import _fetch # noqa: E402
22
+
23
+
24
+ @pytest.fixture(autouse=True)
25
+ def local_fetch(monkeypatch):
26
+ """Redirect the package fetcher at the current working tree."""
27
+ def _read(path: str) -> bytes:
28
+ full = REPO / path
29
+ return full.read_bytes()
30
+
31
+ monkeypatch.setattr(_fetch, "_fetch", _read)
32
+ yield
33
+
34
+
35
+ def test_career_predictions_2026_has_rookies():
36
+ df = stathead.load_career_predictions_2026()
37
+ assert not df.empty
38
+ assert {"name", "position", "predictedCareerPPG", "modelTier", "percentile"}.issubset(df.columns)
39
+
40
+
41
+ def test_career_backtest_has_history():
42
+ df = stathead.load_career_backtest()
43
+ assert len(df) > 800
44
+ assert {"name", "position", "predictedPPG", "actualPPG", "modelTier"}.issubset(df.columns)
45
+ assert df.position.isin(["QB", "RB", "WR", "TE"]).all()
46
+
47
+
48
+ def test_adp_historical_is_fully_populated():
49
+ df = stathead.load_adp_historical()
50
+ assert len(df) > 4000
51
+ assert {"season", "name", "position", "adp"}.issubset(df.columns)
52
+ # Every season from 2010-2025 should be present.
53
+ assert set(range(2010, 2026)).issubset(set(df.season.unique()))
54
+
55
+
56
+ def test_ktc_current_values():
57
+ df = stathead.load_ktc()
58
+ assert not df.empty
59
+ assert {"name", "value_1qb", "value_superflex", "isRookie"}.issubset(df.columns)
60
+
61
+
62
+ def test_ktc_history_is_long():
63
+ df = stathead.load_ktc_history()
64
+ assert not df.empty
65
+ assert {"playerID", "date", "value_1qb"}.issubset(df.columns)
66
+ assert pd.api.types.is_datetime64_any_dtype(df.date)
67
+
68
+
69
+ def test_prospect_grades_2026():
70
+ df = stathead.load_prospect_grades(2026)
71
+ assert not df.empty
72
+ assert {"name", "pos", "grade"}.issubset(df.columns)
73
+
74
+
75
+ def test_feature_matrix_is_dict():
76
+ m = stathead.load_feature_matrix()
77
+ assert isinstance(m, dict)
78
+ assert "careerPredictions2026" in m
79
+
80
+
81
+ def test_manual_overrides_keyed_by_name_pos():
82
+ o = stathead.load_manual_overrides()
83
+ assert isinstance(o, dict)
84
+ keys = [k for k in o if not k.startswith("_")]
85
+ assert any("|" in k for k in keys)
86
+
87
+
88
+ def test_pin_version_changes_cache_root(tmp_path, monkeypatch):
89
+ # With a custom cache root, the cache file should end up under <ref>/<path>.
90
+ monkeypatch.setattr(_fetch, "_cache_root", lambda: tmp_path)
91
+ _fetch.set_ref("abc123")
92
+ p = _fetch._cache_path("foo/bar.json")
93
+ assert p.parent.name == "foo"
94
+ assert p.parent.parent.name == "abc123"
95
+ _fetch.set_ref("main") # reset