spforge 0.8.10__tar.gz → 0.8.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.10/spforge.egg-info → spforge-0.8.13}/PKG-INFO +1 -1
- {spforge-0.8.10 → spforge-0.8.13}/pyproject.toml +1 -1
- {spforge-0.8.10 → spforge-0.8.13}/spforge/autopipeline.py +92 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_player_rating.py +68 -20
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/utils.py +16 -6
- {spforge-0.8.10 → spforge-0.8.13/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/SOURCES.txt +2 -0
- spforge-0.8.13/tests/ratings/test_player_rating_no_mutation.py +214 -0
- spforge-0.8.13/tests/ratings/test_utils_scaled_weights.py +136 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/test_autopipeline.py +141 -0
- {spforge-0.8.10 → spforge-0.8.13}/LICENSE +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/MANIFEST.in +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/README.md +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/game_level_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/lol/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/setup.cfg +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/data_structures.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_group_by_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/scorer/_score.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_other_transformer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge/utils.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_player_rating_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.13
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.13"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
|
|
|
195
195
|
return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
|
|
196
196
|
|
|
197
197
|
|
|
198
|
+
def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
|
|
199
|
+
"""Recursively find innermost estimator with feature_importances_ or coef_."""
|
|
200
|
+
if hasattr(estimator, "feature_importances_"):
|
|
201
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
202
|
+
if inner is not None:
|
|
203
|
+
return inner
|
|
204
|
+
return (estimator, "feature_importances_")
|
|
205
|
+
|
|
206
|
+
if hasattr(estimator, "coef_"):
|
|
207
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
208
|
+
if inner is not None:
|
|
209
|
+
return inner
|
|
210
|
+
return (estimator, "coef_")
|
|
211
|
+
|
|
212
|
+
return _get_importance_estimator_inner(estimator)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
|
|
216
|
+
"""Check wrapped estimators for importance attributes."""
|
|
217
|
+
# Check estimator_ (sklearn fitted wrapper convention)
|
|
218
|
+
if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
|
|
219
|
+
result = _get_importance_estimator(estimator.estimator_)
|
|
220
|
+
if result is not None:
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
# Check _est (GroupByEstimator convention)
|
|
224
|
+
if hasattr(estimator, "_est") and estimator._est is not None:
|
|
225
|
+
result = _get_importance_estimator(estimator._est)
|
|
226
|
+
if result is not None:
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
|
|
198
232
|
class AutoPipeline(BaseEstimator):
|
|
199
233
|
def __init__(
|
|
200
234
|
self,
|
|
@@ -627,3 +661,61 @@ class AutoPipeline(BaseEstimator):
|
|
|
627
661
|
all_features.append(ctx)
|
|
628
662
|
|
|
629
663
|
return all_features
|
|
664
|
+
|
|
665
|
+
def _get_estimator_feature_names(self) -> list[str]:
|
|
666
|
+
"""Get feature names as seen by the final estimator after all transformations."""
|
|
667
|
+
pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
|
|
668
|
+
|
|
669
|
+
# Remove context columns dropped by "final" step
|
|
670
|
+
final_step = self.sklearn_pipeline.named_steps["final"]
|
|
671
|
+
drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
|
|
672
|
+
features = [f for f in pre_out if f not in drop_cols]
|
|
673
|
+
|
|
674
|
+
# Remove granularity columns (dropped by GroupByEstimator)
|
|
675
|
+
granularity_set = set(self.granularity)
|
|
676
|
+
features = [f for f in features if f not in granularity_set]
|
|
677
|
+
|
|
678
|
+
# Remove context features (used by wrapper estimators, not inner model)
|
|
679
|
+
context_set = set(self.context_feature_names)
|
|
680
|
+
features = [f for f in features if f not in context_set]
|
|
681
|
+
|
|
682
|
+
return features
|
|
683
|
+
|
|
684
|
+
@property
|
|
685
|
+
def feature_importances_(self) -> pd.DataFrame:
|
|
686
|
+
"""Get feature importances from the fitted estimator.
|
|
687
|
+
|
|
688
|
+
Returns a DataFrame with columns ["feature", "importance"] sorted by
|
|
689
|
+
absolute importance descending. Works with tree-based models
|
|
690
|
+
(feature_importances_) and linear models (coef_).
|
|
691
|
+
"""
|
|
692
|
+
if self.sklearn_pipeline is None:
|
|
693
|
+
raise RuntimeError("Pipeline not fitted. Call fit() first.")
|
|
694
|
+
|
|
695
|
+
est = self.sklearn_pipeline.named_steps["est"]
|
|
696
|
+
result = _get_importance_estimator(est)
|
|
697
|
+
|
|
698
|
+
if result is None:
|
|
699
|
+
raise RuntimeError(
|
|
700
|
+
"Estimator does not support feature importances. "
|
|
701
|
+
"Requires feature_importances_ or coef_ attribute."
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
inner_est, attr_name = result
|
|
705
|
+
raw = getattr(inner_est, attr_name)
|
|
706
|
+
|
|
707
|
+
if attr_name == "coef_":
|
|
708
|
+
# Linear models: use absolute value of coefficients
|
|
709
|
+
if raw.ndim == 2:
|
|
710
|
+
# Multi-class: average absolute values across classes
|
|
711
|
+
importances = np.abs(raw).mean(axis=0)
|
|
712
|
+
else:
|
|
713
|
+
importances = np.abs(raw)
|
|
714
|
+
else:
|
|
715
|
+
importances = raw
|
|
716
|
+
|
|
717
|
+
feature_names = self._get_estimator_feature_names()
|
|
718
|
+
|
|
719
|
+
df = pd.DataFrame({"feature": feature_names, "importance": importances})
|
|
720
|
+
df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
|
|
721
|
+
return df
|
|
@@ -34,6 +34,8 @@ from spforge.ratings.utils import (
|
|
|
34
34
|
from spforge.feature_generator._utils import to_polars
|
|
35
35
|
|
|
36
36
|
PLAYER_STATS = "__PLAYER_STATS"
|
|
37
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
38
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class PlayerRatingGenerator(RatingGenerator):
|
|
@@ -273,6 +275,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
273
275
|
self._projected_participation_weight_max = self._participation_weight_max
|
|
274
276
|
|
|
275
277
|
def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
278
|
+
"""Create internal scaled participation weight columns without mutating originals."""
|
|
276
279
|
if not self.scale_participation_weights:
|
|
277
280
|
return df
|
|
278
281
|
if self._participation_weight_max is None or self._participation_weight_max <= 0:
|
|
@@ -287,7 +290,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
287
290
|
df = df.with_columns(
|
|
288
291
|
(pl.col(cn.participation_weight) / denom)
|
|
289
292
|
.clip(0.0, 1.0)
|
|
290
|
-
.alias(
|
|
293
|
+
.alias(_SCALED_PW)
|
|
291
294
|
)
|
|
292
295
|
|
|
293
296
|
if (
|
|
@@ -300,16 +303,38 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
300
303
|
df = df.with_columns(
|
|
301
304
|
(pl.col(cn.projected_participation_weight) / denom)
|
|
302
305
|
.clip(0.0, 1.0)
|
|
303
|
-
.alias(
|
|
306
|
+
.alias(_SCALED_PPW)
|
|
304
307
|
)
|
|
305
308
|
|
|
306
309
|
return df
|
|
307
310
|
|
|
311
|
+
def _get_participation_weight_col(self) -> str:
|
|
312
|
+
"""Get the column name to use for participation weight (scaled if available)."""
|
|
313
|
+
cn = self.column_names
|
|
314
|
+
if self.scale_participation_weights and cn and cn.participation_weight:
|
|
315
|
+
return _SCALED_PW
|
|
316
|
+
return cn.participation_weight if cn else ""
|
|
317
|
+
|
|
318
|
+
def _get_projected_participation_weight_col(self) -> str:
|
|
319
|
+
"""Get the column name to use for projected participation weight (scaled if available)."""
|
|
320
|
+
cn = self.column_names
|
|
321
|
+
if self.scale_participation_weights and cn and cn.projected_participation_weight:
|
|
322
|
+
return _SCALED_PPW
|
|
323
|
+
return cn.projected_participation_weight if cn else ""
|
|
324
|
+
|
|
325
|
+
def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
326
|
+
"""Remove internal scaled columns before returning."""
|
|
327
|
+
cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
|
|
328
|
+
if cols_to_drop:
|
|
329
|
+
df = df.drop(cols_to_drop)
|
|
330
|
+
return df
|
|
331
|
+
|
|
308
332
|
def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
309
333
|
df = self._scale_participation_weight_columns(df)
|
|
310
334
|
match_df = self._create_match_df(df)
|
|
311
335
|
ratings = self._calculate_ratings(match_df)
|
|
312
336
|
|
|
337
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
313
338
|
cols = [
|
|
314
339
|
c
|
|
315
340
|
for c in df.columns
|
|
@@ -329,13 +354,15 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
329
354
|
on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
|
|
330
355
|
)
|
|
331
356
|
|
|
332
|
-
|
|
357
|
+
result = self._add_rating_features(df)
|
|
358
|
+
return self._remove_internal_scaled_columns(result)
|
|
333
359
|
|
|
334
360
|
def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
335
361
|
df = self._scale_participation_weight_columns(df)
|
|
336
362
|
match_df = self._create_match_df(df)
|
|
337
363
|
ratings = self._calculate_future_ratings(match_df)
|
|
338
364
|
|
|
365
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
339
366
|
cols = [
|
|
340
367
|
c
|
|
341
368
|
for c in df.columns
|
|
@@ -360,7 +387,8 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
360
387
|
how="left",
|
|
361
388
|
)
|
|
362
389
|
|
|
363
|
-
|
|
390
|
+
result = self._add_rating_features(df_with_ratings)
|
|
391
|
+
return self._remove_internal_scaled_columns(result)
|
|
364
392
|
|
|
365
393
|
def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
|
|
366
394
|
cn = self.column_names
|
|
@@ -796,9 +824,13 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
796
824
|
|
|
797
825
|
if cn.participation_weight and cn.participation_weight in df.columns:
|
|
798
826
|
player_stat_cols.append(cn.participation_weight)
|
|
827
|
+
if _SCALED_PW in df.columns:
|
|
828
|
+
player_stat_cols.append(_SCALED_PW)
|
|
799
829
|
|
|
800
830
|
if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
|
|
801
831
|
player_stat_cols.append(cn.projected_participation_weight)
|
|
832
|
+
if _SCALED_PPW in df.columns:
|
|
833
|
+
player_stat_cols.append(_SCALED_PPW)
|
|
802
834
|
|
|
803
835
|
if cn.position and cn.position in df.columns:
|
|
804
836
|
player_stat_cols.append(cn.position)
|
|
@@ -854,14 +886,23 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
854
886
|
position = team_player.get(cn.position)
|
|
855
887
|
player_league = team_player.get(cn.league, None)
|
|
856
888
|
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
team_player.get(cn.
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
889
|
+
# Use scaled participation weight if available, otherwise use original
|
|
890
|
+
if _SCALED_PW in team_player:
|
|
891
|
+
participation_weight = team_player.get(_SCALED_PW, 1.0)
|
|
892
|
+
elif cn.participation_weight:
|
|
893
|
+
participation_weight = team_player.get(cn.participation_weight, 1.0)
|
|
894
|
+
else:
|
|
895
|
+
participation_weight = 1.0
|
|
896
|
+
|
|
897
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
898
|
+
if _SCALED_PPW in team_player:
|
|
899
|
+
projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
|
|
900
|
+
elif cn.projected_participation_weight:
|
|
901
|
+
projected_participation_weight = team_player.get(
|
|
902
|
+
cn.projected_participation_weight, participation_weight
|
|
903
|
+
)
|
|
904
|
+
else:
|
|
905
|
+
projected_participation_weight = participation_weight
|
|
865
906
|
projected_participation_weights.append(projected_participation_weight)
|
|
866
907
|
|
|
867
908
|
perf_val = (
|
|
@@ -1087,14 +1128,21 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1087
1128
|
position = tp.get(cn.position)
|
|
1088
1129
|
league = tp.get(cn.league, None)
|
|
1089
1130
|
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
tp.get(cn.
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1131
|
+
# Use scaled participation weight if available, otherwise use original
|
|
1132
|
+
if _SCALED_PW in tp:
|
|
1133
|
+
pw = tp.get(_SCALED_PW, 1.0)
|
|
1134
|
+
elif cn.participation_weight:
|
|
1135
|
+
pw = tp.get(cn.participation_weight, 1.0)
|
|
1136
|
+
else:
|
|
1137
|
+
pw = 1.0
|
|
1138
|
+
|
|
1139
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
1140
|
+
if _SCALED_PPW in tp:
|
|
1141
|
+
ppw = tp.get(_SCALED_PPW, pw)
|
|
1142
|
+
elif cn.projected_participation_weight:
|
|
1143
|
+
ppw = tp.get(cn.projected_participation_weight, pw)
|
|
1144
|
+
else:
|
|
1145
|
+
ppw = pw
|
|
1098
1146
|
proj_w.append(float(ppw))
|
|
1099
1147
|
|
|
1100
1148
|
mp = MatchPerformance(
|
|
@@ -2,6 +2,10 @@ import polars as pl
|
|
|
2
2
|
|
|
3
3
|
from spforge.data_structures import ColumnNames
|
|
4
4
|
|
|
5
|
+
# Internal column names for scaled participation weights
|
|
6
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
7
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
8
|
+
|
|
5
9
|
|
|
6
10
|
def add_team_rating(
|
|
7
11
|
df: pl.DataFrame,
|
|
@@ -46,11 +50,14 @@ def add_team_rating_projected(
|
|
|
46
50
|
tid = column_names.team_id
|
|
47
51
|
ppw = column_names.projected_participation_weight
|
|
48
52
|
|
|
49
|
-
if
|
|
53
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
54
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
55
|
+
|
|
56
|
+
if weight_col and weight_col in df.columns:
|
|
50
57
|
return df.with_columns(
|
|
51
58
|
(
|
|
52
|
-
(pl.col(
|
|
53
|
-
/ pl.col(
|
|
59
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
|
|
60
|
+
/ pl.col(weight_col).sum().over([mid, tid])
|
|
54
61
|
).alias(team_rating_out)
|
|
55
62
|
)
|
|
56
63
|
|
|
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
|
|
|
118
125
|
mid = column_names.match_id
|
|
119
126
|
ppw = column_names.projected_participation_weight
|
|
120
127
|
|
|
121
|
-
if
|
|
128
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
129
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
130
|
+
|
|
131
|
+
if weight_col and weight_col in df.columns:
|
|
122
132
|
return df.with_columns(
|
|
123
133
|
(
|
|
124
|
-
(pl.col(
|
|
125
|
-
/ pl.col(
|
|
134
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
|
|
135
|
+
/ pl.col(weight_col).sum().over(mid)
|
|
126
136
|
).alias(rating_mean_out)
|
|
127
137
|
)
|
|
128
138
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.13
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -103,8 +103,10 @@ tests/hyperparameter_tuning/test_rating_tuner.py
|
|
|
103
103
|
tests/performance_transformers/test_performance_manager.py
|
|
104
104
|
tests/performance_transformers/test_performances_transformers.py
|
|
105
105
|
tests/ratings/test_player_rating_generator.py
|
|
106
|
+
tests/ratings/test_player_rating_no_mutation.py
|
|
106
107
|
tests/ratings/test_ratings_property.py
|
|
107
108
|
tests/ratings/test_team_rating_generator.py
|
|
109
|
+
tests/ratings/test_utils_scaled_weights.py
|
|
108
110
|
tests/scorer/test_score.py
|
|
109
111
|
tests/scorer/test_score_aggregation_granularity.py
|
|
110
112
|
tests/transformers/test_estimator_transformer_context.py
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Tests to ensure PlayerRatingGenerator does not mutate input columns."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from spforge import ColumnNames
|
|
7
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def cn_with_projected():
|
|
12
|
+
"""ColumnNames with both participation_weight and projected_participation_weight."""
|
|
13
|
+
return ColumnNames(
|
|
14
|
+
player_id="pid",
|
|
15
|
+
team_id="tid",
|
|
16
|
+
match_id="mid",
|
|
17
|
+
start_date="dt",
|
|
18
|
+
update_match_id="mid",
|
|
19
|
+
participation_weight="minutes",
|
|
20
|
+
projected_participation_weight="minutes_prediction",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def fit_df():
|
|
26
|
+
"""Training data with minutes > 1 (will trigger auto-scaling)."""
|
|
27
|
+
return pl.DataFrame(
|
|
28
|
+
{
|
|
29
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
30
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
31
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
32
|
+
"dt": ["2024-01-01"] * 4,
|
|
33
|
+
"perf": [0.6, 0.4, 0.7, 0.3],
|
|
34
|
+
"minutes": [30.0, 25.0, 32.0, 28.0],
|
|
35
|
+
"minutes_prediction": [28.0, 24.0, 30.0, 26.0],
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def future_df():
|
|
42
|
+
"""Future prediction data with minutes > 1 (will trigger auto-scaling)."""
|
|
43
|
+
return pl.DataFrame(
|
|
44
|
+
{
|
|
45
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
46
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
47
|
+
"mid": ["M2", "M2", "M2", "M2"],
|
|
48
|
+
"dt": ["2024-01-02"] * 4,
|
|
49
|
+
"minutes": [30.0, 25.0, 32.0, 28.0],
|
|
50
|
+
"minutes_prediction": [28.0, 24.0, 30.0, 26.0],
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_fit_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df):
|
|
56
|
+
"""fit_transform should not modify the participation_weight column values."""
|
|
57
|
+
# Join result with original to compare values by player_id
|
|
58
|
+
gen = PlayerRatingGenerator(
|
|
59
|
+
performance_column="perf",
|
|
60
|
+
column_names=cn_with_projected,
|
|
61
|
+
auto_scale_performance=True,
|
|
62
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
63
|
+
)
|
|
64
|
+
result = gen.fit_transform(fit_df)
|
|
65
|
+
|
|
66
|
+
# Check that each player's minutes value is preserved
|
|
67
|
+
original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes"].to_list()))
|
|
68
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
|
|
69
|
+
|
|
70
|
+
for pid, original_val in original_by_player.items():
|
|
71
|
+
result_val = result_by_player[pid]
|
|
72
|
+
assert result_val == original_val, (
|
|
73
|
+
f"participation_weight for player {pid} was mutated. "
|
|
74
|
+
f"Expected {original_val}, got {result_val}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_fit_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df):
|
|
79
|
+
"""fit_transform should not modify the projected_participation_weight column values."""
|
|
80
|
+
gen = PlayerRatingGenerator(
|
|
81
|
+
performance_column="perf",
|
|
82
|
+
column_names=cn_with_projected,
|
|
83
|
+
auto_scale_performance=True,
|
|
84
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
85
|
+
)
|
|
86
|
+
result = gen.fit_transform(fit_df)
|
|
87
|
+
|
|
88
|
+
# Check that each player's minutes_prediction value is preserved
|
|
89
|
+
original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes_prediction"].to_list()))
|
|
90
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
|
|
91
|
+
|
|
92
|
+
for pid, original_val in original_by_player.items():
|
|
93
|
+
result_val = result_by_player[pid]
|
|
94
|
+
assert result_val == original_val, (
|
|
95
|
+
f"projected_participation_weight for player {pid} was mutated. "
|
|
96
|
+
f"Expected {original_val}, got {result_val}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
|
|
101
|
+
"""transform should not modify the participation_weight column values."""
|
|
102
|
+
gen = PlayerRatingGenerator(
|
|
103
|
+
performance_column="perf",
|
|
104
|
+
column_names=cn_with_projected,
|
|
105
|
+
auto_scale_performance=True,
|
|
106
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
107
|
+
)
|
|
108
|
+
gen.fit_transform(fit_df)
|
|
109
|
+
|
|
110
|
+
result = gen.transform(future_df)
|
|
111
|
+
|
|
112
|
+
# Check that each player's minutes value is preserved
|
|
113
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes"].to_list()))
|
|
114
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
|
|
115
|
+
|
|
116
|
+
for pid, original_val in original_by_player.items():
|
|
117
|
+
result_val = result_by_player[pid]
|
|
118
|
+
assert result_val == original_val, (
|
|
119
|
+
f"participation_weight for player {pid} was mutated during transform. "
|
|
120
|
+
f"Expected {original_val}, got {result_val}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
|
|
125
|
+
"""transform should not modify the projected_participation_weight column values."""
|
|
126
|
+
gen = PlayerRatingGenerator(
|
|
127
|
+
performance_column="perf",
|
|
128
|
+
column_names=cn_with_projected,
|
|
129
|
+
auto_scale_performance=True,
|
|
130
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
131
|
+
)
|
|
132
|
+
gen.fit_transform(fit_df)
|
|
133
|
+
|
|
134
|
+
result = gen.transform(future_df)
|
|
135
|
+
|
|
136
|
+
# Check that each player's minutes_prediction value is preserved
|
|
137
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
|
|
138
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
|
|
139
|
+
|
|
140
|
+
for pid, original_val in original_by_player.items():
|
|
141
|
+
result_val = result_by_player[pid]
|
|
142
|
+
assert result_val == original_val, (
|
|
143
|
+
f"projected_participation_weight for player {pid} was mutated during transform. "
|
|
144
|
+
f"Expected {original_val}, got {result_val}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_future_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
|
|
149
|
+
"""future_transform should not modify the participation_weight column values."""
|
|
150
|
+
gen = PlayerRatingGenerator(
|
|
151
|
+
performance_column="perf",
|
|
152
|
+
column_names=cn_with_projected,
|
|
153
|
+
auto_scale_performance=True,
|
|
154
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
155
|
+
)
|
|
156
|
+
gen.fit_transform(fit_df)
|
|
157
|
+
|
|
158
|
+
original_minutes = future_df["minutes"].to_list()
|
|
159
|
+
result = gen.future_transform(future_df)
|
|
160
|
+
|
|
161
|
+
# The minutes column should have the same values as before
|
|
162
|
+
result_minutes = result["minutes"].to_list()
|
|
163
|
+
assert result_minutes == original_minutes, (
|
|
164
|
+
f"participation_weight column was mutated during future_transform. "
|
|
165
|
+
f"Expected {original_minutes}, got {result_minutes}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_future_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
|
|
170
|
+
"""future_transform should not modify the projected_participation_weight column values."""
|
|
171
|
+
gen = PlayerRatingGenerator(
|
|
172
|
+
performance_column="perf",
|
|
173
|
+
column_names=cn_with_projected,
|
|
174
|
+
auto_scale_performance=True,
|
|
175
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
176
|
+
)
|
|
177
|
+
gen.fit_transform(fit_df)
|
|
178
|
+
|
|
179
|
+
original_minutes_pred = future_df["minutes_prediction"].to_list()
|
|
180
|
+
result = gen.future_transform(future_df)
|
|
181
|
+
|
|
182
|
+
# The minutes_prediction column should have the same values as before
|
|
183
|
+
result_minutes_pred = result["minutes_prediction"].to_list()
|
|
184
|
+
assert result_minutes_pred == original_minutes_pred, (
|
|
185
|
+
f"projected_participation_weight column was mutated during future_transform. "
|
|
186
|
+
f"Expected {original_minutes_pred}, got {result_minutes_pred}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_multiple_transforms_do_not_compound_scaling(cn_with_projected, fit_df, future_df):
|
|
191
|
+
"""Multiple transform calls should not compound the scaling effect."""
|
|
192
|
+
gen = PlayerRatingGenerator(
|
|
193
|
+
performance_column="perf",
|
|
194
|
+
column_names=cn_with_projected,
|
|
195
|
+
auto_scale_performance=True,
|
|
196
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
197
|
+
)
|
|
198
|
+
gen.fit_transform(fit_df)
|
|
199
|
+
|
|
200
|
+
# Call transform multiple times
|
|
201
|
+
result1 = gen.transform(future_df)
|
|
202
|
+
result2 = gen.transform(result1)
|
|
203
|
+
result3 = gen.transform(result2)
|
|
204
|
+
|
|
205
|
+
# After 3 transforms, each player's values should still be the same as original
|
|
206
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
|
|
207
|
+
final_by_player = dict(zip(result3["pid"].to_list(), result3["minutes_prediction"].to_list()))
|
|
208
|
+
|
|
209
|
+
for pid, original_val in original_by_player.items():
|
|
210
|
+
final_val = final_by_player[pid]
|
|
211
|
+
assert final_val == original_val, (
|
|
212
|
+
f"Multiple transforms compounded the scaling for player {pid}. "
|
|
213
|
+
f"Expected {original_val}, got {final_val}"
|
|
214
|
+
)
|