PyPI - spforge - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.18__py3-none-any.whl - Mend

spforge 0.8.4py3-none-any.whl → 0.8.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spforge might be problematic. Click here for more details.

Files changed (37) hide show

examples/lol/pipeline_transformer_example.py +69 -86
examples/nba/cross_validation_example.py +4 -11
examples/nba/feature_engineering_example.py +33 -15
examples/nba/game_winner_example.py +24 -14
examples/nba/predictor_transformers_example.py +29 -16
spforge/__init__.py +1 -0
spforge/autopipeline.py +169 -5
spforge/estimator/_group_by_estimator.py +11 -3
spforge/features_generator_pipeline.py +8 -4
spforge/hyperparameter_tuning/__init__.py +12 -0
spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
spforge/hyperparameter_tuning/_tuner.py +192 -0
spforge/performance_transformers/_performance_manager.py +2 -4
spforge/ratings/__init__.py +4 -0
spforge/ratings/_player_rating.py +142 -28
spforge/ratings/league_start_rating_optimizer.py +201 -0
spforge/ratings/start_rating_generator.py +1 -1
spforge/ratings/team_start_rating_generator.py +1 -1
spforge/ratings/utils.py +16 -6
spforge/scorer/_score.py +42 -11
spforge/transformers/_other_transformer.py +38 -8
{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/METADATA +12 -19
{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/RECORD +37 -31
{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/WHEEL +1 -1
tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
tests/performance_transformers/test_performance_manager.py +15 -0
tests/ratings/test_player_rating_generator.py +154 -0
tests/ratings/test_player_rating_no_mutation.py +214 -0
tests/ratings/test_utils_scaled_weights.py +136 -0
tests/scorer/test_score.py +232 -0
tests/test_autopipeline.py +336 -6
tests/test_feature_generator_pipeline.py +43 -0
{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/licenses/LICENSE +0 -0
{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/top_level.txt +0 -0

spforge/ratings/league_start_rating_optimizer.py ADDED Viewed

@@ -0,0 +1,201 @@
+from __future__ import annotations
+import copy
+from dataclasses import dataclass
+import narwhals.stable.v2 as nw
+import polars as pl
+from narwhals.stable.v2.typing import IntoFrameT
+DEFAULT_START_RATING = 1000.0
+@dataclass
+class LeagueStartRatingOptimizationResult:
+    league_ratings: dict[str, float]
+    iteration_errors: list[dict[str, float]]
+class LeagueStartRatingOptimizer:
+    def __init__(
+        self,
+        rating_generator: object,
+        n_iterations: int = 3,
+        learning_rate: float = 0.2,
+        min_cross_region_rows: int = 10,
+        rating_scale: float | None = None,
+    ):
+        self.rating_generator = rating_generator
+        self.n_iterations = int(n_iterations)
+        self.learning_rate = float(learning_rate)
+        self.min_cross_region_rows = int(min_cross_region_rows)
+        self.rating_scale = rating_scale
+    @nw.narwhalify
+    def optimize(self, df: IntoFrameT) -> LeagueStartRatingOptimizationResult:
+        pl_df = df.to_native() if df.implementation.is_polars() else df.to_polars()
+        league_ratings = self._get_league_ratings(self.rating_generator)
+        iteration_errors: list[dict[str, float]] = []
+        for _ in range(self.n_iterations):
+            gen = copy.deepcopy(self.rating_generator)
+            self._set_league_ratings(gen, league_ratings)
+            self._ensure_prediction_columns(gen)
+            pred_df = gen.fit_transform(pl_df)
+            error_df = self._cross_region_error_df(pl_df, pred_df, gen)
+            if error_df.is_empty():
+                break
+            error_summary = (
+                error_df.group_by(self._league_column_name(gen))
+                .agg(
+                    pl.col("error").mean().alias("mean_error"),
+                    pl.len().alias("row_count"),
+                )
+                .to_dicts()
+            )
+            league_key = self._league_column_name(gen)
+            iteration_errors.append({r[league_key]: r["mean_error"] for r in error_summary})
+            league_ratings = self._apply_error_updates(
+                gen, league_ratings, error_summary, league_key
+            )
+        self._set_league_ratings(self.rating_generator, league_ratings)
+        return LeagueStartRatingOptimizationResult(
+            league_ratings=league_ratings, iteration_errors=iteration_errors
+        )
+    def _cross_region_error_df(
+        self,
+        df: pl.DataFrame,
+        pred_df: pl.DataFrame,
+        rating_generator: object,
+    ) -> pl.DataFrame:
+        column_names = getattr(rating_generator, "column_names", None)
+        if column_names is None:
+            raise ValueError("rating_generator must define column_names")
+        match_id = getattr(column_names, "match_id", None)
+        team_id = getattr(column_names, "team_id", None)
+        league_col = getattr(column_names, "league", None)
+        if not match_id or not team_id or not league_col:
+            raise ValueError("column_names must include match_id, team_id, and league")
+        pred_col, entity_cols, perf_col = self._prediction_spec(rating_generator)
+        base_cols = [match_id, team_id, league_col, perf_col]
+        for col in base_cols + entity_cols:
+            if col not in df.columns:
+                raise ValueError(f"{col} missing from input dataframe")
+        join_cols = [match_id, team_id] + entity_cols
+        joined = df.select(base_cols + entity_cols).join(
+            pred_df.select(join_cols + [pred_col]),
+            on=join_cols,
+            how="inner",
+        )
+        opp_league = self._opponent_mode_league(joined, match_id, team_id, league_col)
+        enriched = joined.join(opp_league, on=[match_id, team_id], how="left").with_columns(
+            (pl.col(perf_col) - pl.col(pred_col)).alias("error")
+        )
+        return enriched.filter(pl.col("opp_mode_league").is_not_null()).filter(
+            pl.col(league_col) != pl.col("opp_mode_league")
+        )
+    def _opponent_mode_league(
+        self, df: pl.DataFrame, match_id: str, team_id: str, league_col: str
+    ) -> pl.DataFrame:
+        team_mode = (
+            df.group_by([match_id, team_id, league_col])
+            .agg(pl.len().alias("__count"))
+            .sort(["__count"], descending=True)
+            .unique([match_id, team_id])
+            .select([match_id, team_id, league_col])
+            .rename({league_col: "team_mode_league"})
+        )
+        opponents = (
+            team_mode.join(team_mode, on=match_id, suffix="_opp")
+            .filter(pl.col(team_id) != pl.col(f"{team_id}_opp"))
+            .group_by([match_id, team_id, "team_mode_league_opp"])
+            .agg(pl.len().alias("__count"))
+            .sort(["__count"], descending=True)
+            .unique([match_id, team_id])
+            .select([match_id, team_id, "team_mode_league_opp"])
+            .rename({"team_mode_league_opp": "opp_mode_league"})
+        )
+        return opponents
+    def _prediction_spec(self, rating_generator: object) -> tuple[str, list[str], str]:
+        perf_col = getattr(rating_generator, "performance_column", None)
+        if not perf_col:
+            raise ValueError("rating_generator must define performance_column")
+        if hasattr(rating_generator, "PLAYER_PRED_PERF_COL"):
+            pred_col = rating_generator.PLAYER_PRED_PERF_COL
+            column_names = rating_generator.column_names
+            player_id = getattr(column_names, "player_id", None)
+            if not player_id:
+                raise ValueError("column_names must include player_id for player ratings")
+            return pred_col, [player_id], perf_col
+        if hasattr(rating_generator, "TEAM_PRED_OFF_PERF_COL"):
+            pred_col = rating_generator.TEAM_PRED_OFF_PERF_COL
+            return pred_col, [], perf_col
+        raise ValueError("rating_generator must expose a predicted performance column")
+    def _ensure_prediction_columns(self, rating_generator: object) -> None:
+        pred_cols: list[str] = []
+        if hasattr(rating_generator, "PLAYER_PRED_PERF_COL"):
+            pred_cols.append(rating_generator.PLAYER_PRED_PERF_COL)
+        elif hasattr(rating_generator, "TEAM_PRED_OFF_PERF_COL"):
+            pred_cols.append(rating_generator.TEAM_PRED_OFF_PERF_COL)
+        if not pred_cols:
+            return
+        existing = list(getattr(rating_generator, "non_predictor_features_out", []) or [])
+        for col in pred_cols:
+            if col not in existing:
+                existing.append(col)
+        rating_generator.non_predictor_features_out = existing
+    def _apply_error_updates(
+        self,
+        rating_generator: object,
+        league_ratings: dict[str, float],
+        error_summary: list[dict[str, float]],
+        league_key: str,
+    ) -> dict[str, float]:
+        scale = self.rating_scale
+        if scale is None:
+            scale = getattr(rating_generator, "rating_change_multiplier_offense", 1.0)
+        updated = dict(league_ratings)
+        for row in error_summary:
+            if row["row_count"] < self.min_cross_region_rows:
+                continue
+            league = row[league_key]
+            mean_error = row["mean_error"]
+            base_rating = updated.get(league, DEFAULT_START_RATING)
+            updated[league] = base_rating + self.learning_rate * mean_error * scale
+        return updated
+    def _league_column_name(self, rating_generator: object) -> str:
+        column_names = getattr(rating_generator, "column_names", None)
+        league_col = getattr(column_names, "league", None)
+        if not league_col:
+            raise ValueError("column_names must include league for league adjustments")
+        return league_col
+    def _get_league_ratings(self, rating_generator: object) -> dict[str, float]:
+        start_gen = getattr(rating_generator, "start_rating_generator", None)
+        if start_gen is None or not hasattr(start_gen, "league_ratings"):
+            raise ValueError("rating_generator must define start_rating_generator.league_ratings")
+        return dict(start_gen.league_ratings)
+    def _set_league_ratings(self, rating_generator: object, league_ratings: dict[str, float]) -> None:
+        start_gen = getattr(rating_generator, "start_rating_generator", None)
+        if start_gen is None or not hasattr(start_gen, "league_ratings"):
+            raise ValueError("rating_generator must define start_rating_generator.league_ratings")
+        start_gen.league_ratings = dict(league_ratings)
+        if hasattr(rating_generator, "start_league_ratings"):
+            rating_generator.start_league_ratings = dict(league_ratings)

spforge/ratings/start_rating_generator.py CHANGED Viewed

@@ -28,7 +28,7 @@ class StartRatingGenerator:
         min_count_for_percentiles: int = 50,
         team_rating_subtract: float = 80,
         team_weight: float = 0,
-        max_days_ago_league_entities: int = 120,
+        max_days_ago_league_entities: int = 600,
         min_match_count_team_rating: int = 2,
         harcoded_start_rating: float | None = None,
     ):

spforge/ratings/team_start_rating_generator.py CHANGED Viewed

@@ -24,7 +24,7 @@ class TeamStartRatingGenerator:
         league_ratings: dict[str, float] | None = None,
         league_quantile: float = 0.2,
         min_count_for_percentiles: int = 50,
-        max_days_ago_league_entities: int = 120,
+        max_days_ago_league_entities: int = 600,
         min_match_count_team_rating: int = 2,
         harcoded_start_rating: float | None = None,
     ):

spforge/ratings/utils.py CHANGED Viewed

@@ -2,6 +2,10 @@ import polars as pl
 from spforge.data_structures import ColumnNames
+# Internal column names for scaled participation weights
+_SCALED_PW = "__scaled_participation_weight__"
+_SCALED_PPW = "__scaled_projected_participation_weight__"
 def add_team_rating(
     df: pl.DataFrame,
@@ -46,11 +50,14 @@ def add_team_rating_projected(
     tid = column_names.team_id
     ppw = column_names.projected_participation_weight
-    if ppw:
+    # Use scaled column if available (clipped to [0, 1]), otherwise raw column
+    weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
+    if weight_col and weight_col in df.columns:
         return df.with_columns(
             (
-                (pl.col(ppw) * pl.col(player_rating_col)).sum().over([mid, tid])
-                / pl.col(ppw).sum().over([mid, tid])
+                (pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
+                / pl.col(weight_col).sum().over([mid, tid])
             ).alias(team_rating_out)
         )
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
     mid = column_names.match_id
     ppw = column_names.projected_participation_weight
-    if ppw:
+    # Use scaled column if available (clipped to [0, 1]), otherwise raw column
+    weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
+    if weight_col and weight_col in df.columns:
         return df.with_columns(
             (
-                (pl.col(ppw) * pl.col(player_rating_col)).sum().over(mid)
-                / pl.col(ppw).sum().over(mid)
+                (pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
+                / pl.col(weight_col).sum().over(mid)
             ).alias(rating_mean_out)
         )

spforge/scorer/_score.py CHANGED Viewed

@@ -366,18 +366,49 @@ class PWMSE(BaseScorer):
         self.labels = labels
         self.evaluation_labels = evaluation_labels
+        self._needs_extension = False
+        self._needs_slicing = False
         self._eval_indices: list[int] | None = None
+        self._extension_mapping: dict[int, int] | None = None
         if self.evaluation_labels is not None and self.labels is not None:
-            label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
-            self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
+            training_set = set(self.labels)
+            eval_set = set(self.evaluation_labels)
+            if eval_set <= training_set:
+                self._needs_slicing = True
+                label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
+                self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
+            elif training_set <= eval_set:
+                self._needs_extension = True
+                eval_label_to_idx = {lbl: i for i, lbl in enumerate(self.evaluation_labels)}
+                self._extension_mapping = {
+                    train_idx: eval_label_to_idx[lbl]
+                    for train_idx, lbl in enumerate(self.labels)
+                }
+            else:
+                raise ValueError(
+                    f"evaluation_labels must be a subset or superset of labels. "
+                    f"labels={self.labels}, evaluation_labels={self.evaluation_labels}"
+                )
+    def _align_predictions(self, preds: np.ndarray) -> np.ndarray:
+        if self._needs_slicing and self._eval_indices is not None:
+            sliced = preds[:, self._eval_indices]
+            row_sums = sliced.sum(axis=1, keepdims=True)
+            row_sums = np.where(row_sums == 0, 1.0, row_sums)
+            return sliced / row_sums
+        if self._needs_extension and self._extension_mapping is not None:
+            n_samples = preds.shape[0]
+            n_eval_labels = len(self.evaluation_labels)
+            extended = np.full((n_samples, n_eval_labels), 1e-5, dtype=np.float64)
+            for train_idx, eval_idx in self._extension_mapping.items():
+                extended[:, eval_idx] = preds[:, train_idx]
+            row_sums = extended.sum(axis=1, keepdims=True)
+            return extended / row_sums
-    def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
-        if self._eval_indices is None:
-            return preds
-        sliced = preds[:, self._eval_indices]
-        row_sums = sliced.sum(axis=1, keepdims=True)
-        row_sums = np.where(row_sums == 0, 1.0, row_sums)
-        return sliced / row_sums
+        return preds
     def _get_scoring_labels(self) -> list[int]:
         if self.evaluation_labels is not None:
@@ -446,7 +477,7 @@ class PWMSE(BaseScorer):
                 targets = gran_df[self.target].to_numpy().astype(np.float64)
                 preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
-                preds = self._slice_and_renormalize(preds)
+                preds = self._align_predictions(preds)
                 score = self._pwmse_score(targets, preds)
                 if self.compare_to_naive:
                     naive_probs_list = _naive_probability_predictions_for_df(
@@ -464,7 +495,7 @@ class PWMSE(BaseScorer):
         targets = df[self.target].to_numpy().astype(np.float64)
         preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
-        preds = self._slice_and_renormalize(preds)
+        preds = self._align_predictions(preds)
         score = self._pwmse_score(targets, preds)
         if self.compare_to_naive:
             naive_probs_list = _naive_probability_predictions_for_df(

spforge/transformers/_other_transformer.py CHANGED Viewed

@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
 class GroupByReducer(BaseEstimator, TransformerMixin):
-    def __init__(self, granularity: list[str]):
+    def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
         self.granularity = granularity
+        self.aggregation_weight = aggregation_weight
     @nw.narwhalify
     def fit(self, X: IntoFrameT, y: Any = None):
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
             raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
         non_keys = [c for c in df.columns if c not in keys]
-        num_cols = [c for c in non_keys if pd.api.types.is_numeric_dtype(df[c])]
+        schema = df.schema
+        num_cols = [c for c in non_keys if schema[c].is_numeric()]
         other_cols = [c for c in non_keys if c not in num_cols]
         aggs: list[nw.Expr] = []
+        # Backwards compatibility: old pickled objects may not have aggregation_weight
+        weight_col = getattr(self, "aggregation_weight", None)
+        has_weight = weight_col and weight_col in df.columns
         for c in num_cols:
-            aggs.append(nw.col(c).mean().alias(c))
+            if c == weight_col:
+                aggs.append(nw.col(c).sum().alias(c))
+            elif has_weight:
+                aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
+                aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
+            else:
+                aggs.append(nw.col(c).mean().alias(c))
         for c in other_cols:
             aggs.append(nw.col(c).first().alias(c))
+        if has_weight:
+            aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
         out = df.group_by(keys).agg(aggs)
+        if has_weight:
+            weighted_cols = [c for c in num_cols if c != weight_col]
+            for c in weighted_cols:
+                out = out.with_columns(
+                    nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
+                    .then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
+                    .otherwise(nw.col(f"__{c}_fallback"))
+                    .alias(c)
+                )
+            drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
+            drop_cols += [f"__{c}_fallback" for c in weighted_cols]
+            drop_cols.append("__weight_sum")
+            out = out.drop(drop_cols)
         return out
     @nw.narwhalify
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
         if sample_weight is not None:
             df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
-        y_is_numeric = df.select(nw.col("__y")).schema["__y"].is_numeric()
+        y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
+        non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
+        if len(non_uniform) > 0:
+            raise ValueError("Target (y) must be uniform within each granularity group")
-        if y_is_numeric:
-            agg_exprs = [nw.col("__y").mean().alias("__y")]
-        else:
-            agg_exprs = [nw.col("__y").first().alias("__y")]
+        agg_exprs = [nw.col("__y").first().alias("__y")]
         if sample_weight is not None:
             agg_exprs.append(nw.col("__sw").sum().alias("__sw"))

{spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: spforge
-Version: 0.8.4
+Version: 0.8.18
 Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
 Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
 License: See LICENSE file
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy>=1.23.4
 Requires-Dist: optuna>=3.4.0
-Requires-Dist: pandas>=2.0.0
+Requires-Dist: pandas<3.0.0,>=2.0.0
 Requires-Dist: pendulum>=1.0.0
 Requires-Dist: scikit-learn>=1.4.0
 Requires-Dist: lightgbm>=4.0.0
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
 import pandas as pd
 from sklearn.linear_model import LogisticRegression
+from examples import get_sub_sample_nba_data
 from spforge.autopipeline import AutoPipeline
 from spforge.data_structures import ColumnNames
-from spforge.ratings import RatingKnownFeatures
-from spforge.ratings._player_rating import PlayerRatingGenerator
+from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
-df = pd.read_parquet("data/game_player_subsample.parquet")
+df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
 # Step 1: Define column mappings for your dataset
 column_names = ColumnNames(
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
 pipeline = AutoPipeline(
     estimator=LogisticRegression(),
     granularity=["game_id", "team_id"],  # Aggregate players → teams
-    feature_names=rating_generator.features_out + ["location"],  # Rating + home/away
+    estimator_features=rating_generator.features_out + ["location"],  # Rating + home/away
 )
 # Train on historical data
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
     prediction_column_name="points_pred",
     target_column="points",
     n_splits=3,  # Number of temporal folds
-    # Must include both feature_names AND context_feature_names
-    features=pipeline.feature_names + pipeline.context_feature_names,
+    # Must include both estimator features and context features
+    features=pipeline.required_features,
 )
 # Generate validation predictions
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
   - `is_validation=1` marks validation rows, `is_validation=0` marks training rows
   - Use `validation_column` in scorer to score only validation rows
 - Training data always comes BEFORE validation data chronologically
-- Must pass both `feature_names` + `context_feature_names` to `features` parameter
+- Must pass all required features (use `pipeline.required_features`)
 - Scorers can filter rows (e.g., only score players who played minutes > 0)
 See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
 # Approach 1: LGBMClassifier (direct probability prediction)
 pipeline_classifier = AutoPipeline(
     estimator=LGBMClassifier(verbose=-100, random_state=42),
-    feature_names=features_pipeline.features_out,
+    estimator_features=features_pipeline.features_out,
 )
 # Approach 2: LGBMRegressor + NegativeBinomialEstimator
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
 pipeline_negbin = AutoPipeline(
     estimator=distribution_estimator,
-    feature_names=features_pipeline.features_out,
-    context_feature_names=[
-        column_names.player_id,
-        column_names.start_date,
-        column_names.team_id,
-        column_names.match_id,
-    ],
+    estimator_features=features_pipeline.features_out,
     predictor_transformers=[
         EstimatorTransformer(
             prediction_column_name="points_estimate",
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
 # Stage 2: Refine estimate using Stage 1 output
 player_points_pipeline = AutoPipeline(
     estimator=LGBMRegressor(verbose=-100, n_estimators=50),
-    feature_names=features_pipeline.features_out,  # Original features
+    estimator_features=features_pipeline.features_out,  # Original features
     # predictor_transformers execute first, adding their predictions
     predictor_transformers=[points_estimate_transformer],
 )
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
 - **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
 - **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
 - **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction

spforge 0.8.4__py3-none-any.whl → 0.8.18__py3-none-any.whl

Potentially problematic release.

spforge 0.8.4py3-none-any.whl → 0.8.18py3-none-any.whl