stathead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
stathead/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ """StatHead — Python client for the fantasy football model.
2
+
3
+ Loaders return pandas DataFrames sourced from the upstream repo at
4
+ https://github.com/dachhack/stathead. First call downloads + caches the
5
+ underlying JSON/CSV; subsequent calls read from the cache.
6
+
7
+ Quick start:
8
+
9
+ import stathead as sh
10
+
11
+ rookies = sh.load_career_predictions_2026()
12
+ backtest = sh.load_career_backtest()
13
+ adp = sh.load_adp_historical()
14
+ ktc = sh.load_ktc()
15
+
16
+ Pin to a specific commit for reproducibility:
17
+
18
+ sh.pin_version("a6720e5") # or any git ref/tag/SHA
19
+ """
20
+ from ._fetch import clear_cache, pin_version, set_ref
21
+ from .adp import load_adp_ffc, load_adp_historical
22
+ from .features import load_feature_matrix, load_manual_overrides
23
+ from .ktc import load_ktc, load_ktc_history
24
+ from .predictions import load_career_backtest, load_career_predictions_2026
25
+ from .prospects import load_prospect_grades
26
+
27
+ __version__ = "0.1.0"
28
+
29
+ __all__ = [
30
+ "__version__",
31
+ "clear_cache",
32
+ "pin_version",
33
+ "set_ref",
34
+ "load_adp_ffc",
35
+ "load_adp_historical",
36
+ "load_career_backtest",
37
+ "load_career_predictions_2026",
38
+ "load_feature_matrix",
39
+ "load_ktc",
40
+ "load_ktc_history",
41
+ "load_manual_overrides",
42
+ "load_prospect_grades",
43
+ ]
stathead/_fetch.py ADDED
@@ -0,0 +1,89 @@
1
+ """Fetch + on-disk cache for files hosted in the stathead repo.
2
+
3
+ All loaders go through :func:`fetch_json` / :func:`fetch_csv_gz`, which
4
+ resolve the URL against the currently-pinned git ref and cache the raw
5
+ bytes under ``~/.cache/stathead/<ref>/<path>``.
6
+
7
+ Override the ref via :func:`pin_version` (per-session) or the
8
+ ``STATHEAD_REF`` env var (per-shell).
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import gzip
13
+ import json
14
+ import os
15
+ import shutil
16
+ from pathlib import Path
17
+ from typing import Any
18
+ from urllib.request import Request, urlopen
19
+
20
+ _BASE_URL = "https://raw.githubusercontent.com/dachhack/stathead"
21
+ _DEFAULT_REF = "main"
22
+ _ref: str = os.environ.get("STATHEAD_REF", _DEFAULT_REF)
23
+
24
+
25
+ def pin_version(ref: str) -> None:
26
+ """Pin all future loaders to a specific git ref (commit SHA, tag, or
27
+ branch name). Passing ``"main"`` resets to latest."""
28
+ set_ref(ref)
29
+
30
+
31
+ def set_ref(ref: str) -> None:
32
+ """Alias of :func:`pin_version` with a clearer name."""
33
+ global _ref
34
+ _ref = ref
35
+
36
+
37
+ def current_ref() -> str:
38
+ """Return the git ref currently used for fetches."""
39
+ return _ref
40
+
41
+
42
+ def _cache_root() -> Path:
43
+ try:
44
+ from platformdirs import user_cache_dir # type: ignore
45
+
46
+ return Path(user_cache_dir("stathead"))
47
+ except ImportError:
48
+ root = os.environ.get("XDG_CACHE_HOME")
49
+ if root:
50
+ return Path(root) / "stathead"
51
+ return Path.home() / ".cache" / "stathead"
52
+
53
+
54
+ def _cache_path(path: str) -> Path:
55
+ return _cache_root() / _ref / path
56
+
57
+
58
+ def clear_cache(ref: str | None = None) -> None:
59
+ """Remove cached files for the given ref (default: current ref).
60
+ Pass ``"*"`` to nuke the entire cache."""
61
+ if ref == "*":
62
+ root = _cache_root()
63
+ else:
64
+ root = _cache_root() / (ref or _ref)
65
+ if root.exists():
66
+ shutil.rmtree(root)
67
+
68
+
69
+ def _fetch(path: str) -> bytes:
70
+ cache = _cache_path(path)
71
+ if cache.exists():
72
+ return cache.read_bytes()
73
+ url = f"{_BASE_URL}/{_ref}/{path}"
74
+ req = Request(url, headers={"User-Agent": "stathead-py"})
75
+ with urlopen(req, timeout=60) as resp: # noqa: S310 — trusted domain
76
+ data = resp.read()
77
+ cache.parent.mkdir(parents=True, exist_ok=True)
78
+ cache.write_bytes(data)
79
+ return data
80
+
81
+
82
+ def fetch_json(path: str) -> Any:
83
+ """Fetch a JSON file from the repo and parse it."""
84
+ return json.loads(_fetch(path).decode("utf-8"))
85
+
86
+
87
+ def fetch_csv_gz(path: str) -> bytes:
88
+ """Fetch a .csv.gz file and return the decompressed CSV bytes."""
89
+ return gzip.decompress(_fetch(path))
stathead/adp.py ADDED
@@ -0,0 +1,67 @@
1
+ """ADP loaders — historical (all seasons, model training data) + FFC raw."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_adp_historical() -> pd.DataFrame:
10
+ """Historical ADP used in model training, fully populated 2010-2025.
11
+
12
+ Sourced from the feature-store profile + players shards (already
13
+ normalized across sources — this is the ADP the model actually trains
14
+ on, not a single vendor's raw response).
15
+
16
+ Columns: ``season``, ``name``, ``name_norm``, ``position``, ``adp``,
17
+ ``adpRound``, ``nflDraftPick``, ``nflDraftRound``, ``age``,
18
+ ``yearsInLeague``.
19
+
20
+ Use this for any cross-year ADP analysis — it has no gaps.
21
+ """
22
+ profile = fetch_json("public/data/feature-store/profile.json")
23
+ players = fetch_json("public/data/feature-store/players.json")
24
+ rows: list[dict] = []
25
+ for key, rec in profile.items():
26
+ if "::" not in key:
27
+ continue
28
+ name_norm, season_str = key.rsplit("::", 1)
29
+ try:
30
+ season = int(season_str)
31
+ except ValueError:
32
+ continue
33
+ info = players.get(key) or {}
34
+ rows.append({
35
+ "season": season,
36
+ "name": info.get("displayName", name_norm),
37
+ "name_norm": name_norm,
38
+ "position": info.get("position"),
39
+ "adp": rec.get("adp"),
40
+ "adpRound": rec.get("adpRound"),
41
+ "nflDraftPick": rec.get("nflDraftPick"),
42
+ "nflDraftRound": rec.get("nflDraftRound"),
43
+ "age": rec.get("age"),
44
+ "yearsInLeague": rec.get("yearsInLeague"),
45
+ })
46
+ return pd.DataFrame(rows).sort_values(["season", "adp"], na_position="last").reset_index(drop=True)
47
+
48
+
49
+ def load_adp_ffc(season: int | None = None) -> pd.DataFrame:
50
+ """FantasyFootballCalculator PPR ADP — raw API responses.
51
+
52
+ Coverage depends on what has been fetched into the repo (currently 2025).
53
+ Pass a ``season`` to filter, or leave ``None`` to load everything available.
54
+
55
+ Columns: ``season``, ``name``, ``position``, ``team``, ``adp``, ``high``,
56
+ ``low``, ``stdev``, ``timesDrafted``, ``bye``.
57
+ """
58
+ seasons = [season] if season else [2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026]
59
+ rows: list[dict] = []
60
+ for s in seasons:
61
+ try:
62
+ d = fetch_json(f"public/data/ffc_adp_ppr_{s}.json")
63
+ except Exception:
64
+ continue
65
+ for p in d.get("players") or []:
66
+ rows.append({"season": s, **p})
67
+ return pd.DataFrame(rows)
stathead/features.py ADDED
@@ -0,0 +1,27 @@
1
+ """Raw feature matrix + manual feature overrides used by the pipeline."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_feature_matrix() -> dict[str, Any]:
10
+ """Return the full ``feature-matrix.json`` as a dict.
11
+
12
+ The file contains many sub-structures (``careerPredictions2026``,
13
+ ``predRows``, ``vorNorm``, schema-per-position, etc.). Most users want
14
+ :func:`~stathead.load_career_predictions_2026` instead — this is an
15
+ escape hatch for advanced introspection.
16
+ """
17
+ return fetch_json("public/data/feature-matrix.json")
18
+
19
+
20
+ def load_manual_overrides() -> dict[str, Any]:
21
+ """Manual CFBD usage overrides keyed by ``"Name|POS"``.
22
+
23
+ Contains per-season carries / receptions / team totals for players
24
+ missing from CFBD's player-usage file (Odell Beckham Jr., Duke Johnson,
25
+ Todd Gurley, etc.). See ``scripts/backfill_cfbd_variants.py``.
26
+ """
27
+ return fetch_json("public/data/manual-cfbd-overrides.json")
stathead/ktc.py ADDED
@@ -0,0 +1,66 @@
1
+ """KeepTradeCut dynasty values — current snapshot + daily history."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_ktc() -> pd.DataFrame:
10
+ """Current KTC dynasty values, one row per player.
11
+
12
+ The 1QB rankings file carries both the ``value`` (1QB) and
13
+ ``superflexValue`` in the same record, so a single fetch gives both
14
+ formats.
15
+
16
+ Columns: ``playerID``, ``name``, ``position``, ``positionRank``, ``team``,
17
+ ``age``, ``value_1qb``, ``value_superflex``, ``isRookie``.
18
+ """
19
+ raw = fetch_json("public/data/ktc_rankings_1qb.json")
20
+ rows = [{
21
+ "playerID": r["playerID"],
22
+ "name": r["playerName"],
23
+ "position": r["position"],
24
+ "positionRank": r.get("positionRank"),
25
+ "team": r.get("team"),
26
+ "age": r.get("age"),
27
+ "value_1qb": r.get("value"),
28
+ "value_superflex": r.get("superflexValue"),
29
+ "isRookie": r.get("isRookie", False),
30
+ } for r in raw]
31
+ return pd.DataFrame(rows)
32
+
33
+
34
+ def load_ktc_history() -> pd.DataFrame:
35
+ """Daily KTC value history — one row per (playerID, date).
36
+
37
+ 1QB and Superflex values are emitted side-by-side to make trend /
38
+ momentum queries easy. ~100k rows (200+ days × 500 players).
39
+
40
+ Columns: ``playerID``, ``name``, ``position``, ``team``, ``date``,
41
+ ``value_1qb``, ``value_superflex``.
42
+ """
43
+ current = fetch_json("public/data/ktc_rankings_1qb.json")
44
+ info_by_id = {r["playerID"]: {
45
+ "name": r["playerName"], "position": r["position"], "team": r.get("team"),
46
+ } for r in current}
47
+
48
+ history = fetch_json("public/data/ktc_history.json")
49
+ rows: list[dict] = []
50
+ for h in history:
51
+ info = info_by_id.get(h["playerID"], {})
52
+ sf_map = {p["d"]: p["v"] for p in (h.get("superflex") or {}).get("valueHistory") or []}
53
+ for p in (h.get("oneQB") or {}).get("valueHistory") or []:
54
+ rows.append({
55
+ "playerID": h["playerID"],
56
+ "name": info.get("name"),
57
+ "position": info.get("position"),
58
+ "team": info.get("team"),
59
+ "date": p["d"],
60
+ "value_1qb": p["v"],
61
+ "value_superflex": sf_map.get(p["d"]),
62
+ })
63
+ df = pd.DataFrame(rows)
64
+ if not df.empty:
65
+ df["date"] = pd.to_datetime(df["date"])
66
+ return df
@@ -0,0 +1,46 @@
1
+ """Rookie career model predictions — 2026 class + historical backtest."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def _flatten(row: dict) -> dict:
10
+ """Lift ``features`` sub-dict into top-level columns."""
11
+ features = row.pop("features", None) or {}
12
+ return {**row, **features}
13
+
14
+
15
+ def load_career_predictions_2026() -> pd.DataFrame:
16
+ """2026 draft-class career predictions, one row per scored prospect.
17
+
18
+ Columns (non-exhaustive): ``name``, ``position``, ``adp``, ``team``,
19
+ ``predictedCareerPPG``, ``percentile``, ``modelTier``, ``boomProb``,
20
+ ``bustProb``, ``boomZ``, ``bustZ``, plus every feature in the model
21
+ (``collegeDominatorRating``, ``collegeUsageOverall``, ``rspDotDraft``,
22
+ ``pdfRankOverallMean``, ``recruitRating``, ``relativeAthleticScore``, …).
23
+ """
24
+ fm = fetch_json("public/data/feature-matrix.json")
25
+ rows = [_flatten(dict(p)) for p in fm.get("careerPredictions2026") or []]
26
+ return pd.DataFrame(rows)
27
+
28
+
29
+ def load_career_backtest() -> pd.DataFrame:
30
+ """Historical rookie backtest rows (2010-2025) with predictedPPG + actualPPG.
31
+
32
+ Used to evaluate the career model (both the precomputed tier system and
33
+ the GBM + Ridge ensemble). One row per drafted prospect across QB/RB/WR/TE.
34
+
35
+ Columns: ``name``, ``position``, ``draftSeason``, ``predictedPPG``,
36
+ ``actualPPG``, ``percentile``, ``modelTier``, ``combinedScore``,
37
+ plus every feature used in training.
38
+ """
39
+ cache = fetch_json("public/data/model-cache-career-v72.json")
40
+ out: list[dict] = []
41
+ for pos, model in (cache.get("rookieCareerModels") or {}).items():
42
+ for r in model.get("backtestRows") or []:
43
+ row = dict(r)
44
+ row.setdefault("position", pos)
45
+ out.append(_flatten(row))
46
+ return pd.DataFrame(out)
stathead/prospects.py ADDED
@@ -0,0 +1,18 @@
1
+ """2026 draft prospect scouting grades (NFL.com / PFN composite)."""
2
+ from __future__ import annotations
3
+
4
+ import pandas as pd
5
+
6
+ from ._fetch import fetch_json
7
+
8
+
9
+ def load_prospect_grades(year: int = 2026) -> pd.DataFrame:
10
+ """NFL draft prospect scouting grades for the given class.
11
+
12
+ Currently only ``year=2026`` is distributed with the repo.
13
+
14
+ Columns: ``name``, ``pos``, ``school``, ``grade``, ``projRound``,
15
+ ``projPick``, ``tier``.
16
+ """
17
+ data = fetch_json(f"src/data/prospect-grades-{year}.json")
18
+ return pd.DataFrame(data)
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: stathead
3
+ Version: 0.1.0
4
+ Summary: Python client for the StatHead fantasy football model — rookie career predictions, historical ADP, dynasty values, and flattened feature matrices.
5
+ Project-URL: Homepage, https://github.com/dachhack/stathead
6
+ Project-URL: Source, https://github.com/dachhack/stathead
7
+ Project-URL: Issues, https://github.com/dachhack/stathead/issues
8
+ Author: StatHead
9
+ License: MIT
10
+ Keywords: adp,dynasty,fantasy-football,ktc,nfl,rookie-prospects
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: pandas>=1.5
19
+ Provides-Extra: dev
20
+ Requires-Dist: duckdb>=0.10; extra == 'dev'
21
+ Requires-Dist: polars>=0.20; extra == 'dev'
22
+ Requires-Dist: pytest>=7; extra == 'dev'
23
+ Provides-Extra: duckdb
24
+ Requires-Dist: duckdb>=0.10; extra == 'duckdb'
25
+ Provides-Extra: polars
26
+ Requires-Dist: polars>=0.20; extra == 'polars'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # stathead
30
+
31
+ Python client for the [StatHead](https://github.com/dachhack/stathead) fantasy football model. Returns pandas DataFrames of rookie career predictions, historical ADP, KeepTradeCut dynasty values, and the flattened feature matrix used to train the models.
32
+
33
+ ## Install
34
+
35
+ ```bash
36
+ pip install stathead
37
+ ```
38
+
39
+ Optional extras:
40
+
41
+ ```bash
42
+ pip install "stathead[polars]" # for .to_polars() helpers
43
+ pip install "stathead[duckdb]" # for local SQL querying
44
+ ```
45
+
46
+ ## Quick start
47
+
48
+ ```python
49
+ import stathead as sh
50
+
51
+ # 2026 rookie class predictions (77 players × ~80 columns)
52
+ rookies = sh.load_career_predictions_2026()
53
+ rookies.nlargest(10, "percentile")[["name", "position", "predictedCareerPPG", "modelTier"]]
54
+
55
+ # Historical backtest — predicted vs actual for every drafted rookie 2010-2025
56
+ backtest = sh.load_career_backtest()
57
+ wr = backtest[backtest.position == "WR"]
58
+ wr.groupby("modelTier")[["actualPPG", "predictedPPG"]].mean()
59
+
60
+ # Historical ADP, every season fully populated
61
+ adp = sh.load_adp_historical()
62
+ adp[(adp.season == 2023) & (adp.adp <= 24)]
63
+
64
+ # Dynasty values
65
+ ktc = sh.load_ktc()
66
+ ktc.nlargest(25, "value_1qb")
67
+
68
+ # Daily KTC history (for momentum / trend analysis)
69
+ hist = sh.load_ktc_history()
70
+ hist.pivot_table(index="date", columns="name", values="value_1qb")
71
+ ```
72
+
73
+ ## Pinning to a specific version
74
+
75
+ Loaders resolve against the upstream GitHub repo. Pin to a commit SHA, tag,
76
+ or branch for reproducibility:
77
+
78
+ ```python
79
+ sh.pin_version("a6720e5") # or a tagged release
80
+ ```
81
+
82
+ Clear the local cache if you want to re-fetch:
83
+
84
+ ```python
85
+ sh.clear_cache()
86
+ ```
87
+
88
+ ## Data freshness
89
+
90
+ Data files are cached under `~/.cache/stathead/<ref>/` after the first
91
+ download. Subsequent runs read from disk — no network roundtrip. Delete the
92
+ cache directory or call `clear_cache()` to force a refresh.
93
+
94
+ ## Available loaders
95
+
96
+ | Function | Returns | Shape |
97
+ |---|---|---|
98
+ | `load_career_predictions_2026()` | 2026 rookie predictions | ~77 × ~80 cols |
99
+ | `load_career_backtest()` | Historical rookies with pred + actual PPG | ~1087 × ~100 cols |
100
+ | `load_adp_historical()` | Model-training ADP 2010-2025 | 4507 × 10 |
101
+ | `load_adp_ffc(season=None)` | FFC PPR raw ADP (per season as fetched) | variable |
102
+ | `load_ktc()` | Current KTC dynasty values | ~500 × 9 |
103
+ | `load_ktc_history()` | Daily KTC history | ~100k × 7 |
104
+ | `load_prospect_grades(year=2026)` | Draft scouting grades | ~200 × 7 |
105
+ | `load_feature_matrix()` | Raw `feature-matrix.json` (dict) | — |
106
+ | `load_manual_overrides()` | Manual CFBD usage overrides (dict) | — |
107
+
108
+ ## Licensing & attribution
109
+
110
+ Package code is MIT-licensed. The data this package retrieves is derived
111
+ from the StatHead project's own modeling pipeline; upstream sources
112
+ (nflverse, FFC, KeepTradeCut, CFBD, etc.) retain their own terms — see
113
+ each source's license before redistributing. If you're building on these
114
+ predictions, a link back to the StatHead repo is appreciated but not
115
+ required.
116
+
117
+ ## Contributing
118
+
119
+ The package is small and focused — see
120
+ [`python/src/stathead/`](./src/stathead/) for the loader modules. Issues
121
+ and PRs welcome at the [main repo](https://github.com/dachhack/stathead).
@@ -0,0 +1,10 @@
1
+ stathead/__init__.py,sha256=X2NGm7sIIBHtkpvwQwOZBVMxJsSmX8z9J2x-GRTjBgg,1238
2
+ stathead/_fetch.py,sha256=XzCxKIRuZGUou3XQYdnPNhTBTP5hTZ71gLc6qKRnKLU,2539
3
+ stathead/adp.py,sha256=YFR9lRyRYBZ8E2_F8Wfn4ny8p184rLK6k5Am2dLR2gE,2533
4
+ stathead/features.py,sha256=Egsg9DcTHaHdGSEwbLZseP2VuMsA6U4A5PoQ2NFhnpM,996
5
+ stathead/ktc.py,sha256=2h99n3gd-YZIpiZrruFAmei-jw_dj_S5Li9kFcno34s,2355
6
+ stathead/predictions.py,sha256=M-ZbM5R2p0h53cJI2HyAYqSoFFf2FpyEzLxR_wD9EfA,1835
7
+ stathead/prospects.py,sha256=YptPFlUq9mNrfYuTml730IN4zJTH_2Yx_pQez-w1sws,546
8
+ stathead-0.1.0.dist-info/METADATA,sha256=ubGM2TQ7cXm84fmDIuR-zDkbS3ABT5O00J5g3T9V5Wc,4307
9
+ stathead-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
10
+ stathead-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any