spforge 0.8.10__tar.gz → 0.8.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (119) hide show
  1. {spforge-0.8.10/spforge.egg-info → spforge-0.8.13}/PKG-INFO +1 -1
  2. {spforge-0.8.10 → spforge-0.8.13}/pyproject.toml +1 -1
  3. {spforge-0.8.10 → spforge-0.8.13}/spforge/autopipeline.py +92 -0
  4. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_player_rating.py +68 -20
  5. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/utils.py +16 -6
  6. {spforge-0.8.10 → spforge-0.8.13/spforge.egg-info}/PKG-INFO +1 -1
  7. {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/SOURCES.txt +2 -0
  8. spforge-0.8.13/tests/ratings/test_player_rating_no_mutation.py +214 -0
  9. spforge-0.8.13/tests/ratings/test_utils_scaled_weights.py +136 -0
  10. {spforge-0.8.10 → spforge-0.8.13}/tests/test_autopipeline.py +141 -0
  11. {spforge-0.8.10 → spforge-0.8.13}/LICENSE +0 -0
  12. {spforge-0.8.10 → spforge-0.8.13}/MANIFEST.in +0 -0
  13. {spforge-0.8.10 → spforge-0.8.13}/README.md +0 -0
  14. {spforge-0.8.10 → spforge-0.8.13}/examples/__init__.py +0 -0
  15. {spforge-0.8.10 → spforge-0.8.13}/examples/game_level_example.py +0 -0
  16. {spforge-0.8.10 → spforge-0.8.13}/examples/lol/__init__.py +0 -0
  17. {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/__init__.py +0 -0
  18. {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/subsample_lol_data.parquet +0 -0
  19. {spforge-0.8.10 → spforge-0.8.13}/examples/lol/data/utils.py +0 -0
  20. {spforge-0.8.10 → spforge-0.8.13}/examples/lol/pipeline_transformer_example.py +0 -0
  21. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/__init__.py +0 -0
  22. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/cross_validation_example.py +0 -0
  23. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/__init__.py +0 -0
  24. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/game_player_subsample.parquet +0 -0
  25. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/data/utils.py +0 -0
  26. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/feature_engineering_example.py +0 -0
  27. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/game_winner_example.py +0 -0
  28. {spforge-0.8.10 → spforge-0.8.13}/examples/nba/predictor_transformers_example.py +0 -0
  29. {spforge-0.8.10 → spforge-0.8.13}/setup.cfg +0 -0
  30. {spforge-0.8.10 → spforge-0.8.13}/spforge/__init__.py +0 -0
  31. {spforge-0.8.10 → spforge-0.8.13}/spforge/base_feature_generator.py +0 -0
  32. {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/__init__.py +0 -0
  33. {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/_base.py +0 -0
  34. {spforge-0.8.10 → spforge-0.8.13}/spforge/cross_validator/cross_validator.py +0 -0
  35. {spforge-0.8.10 → spforge-0.8.13}/spforge/data_structures.py +0 -0
  36. {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/__init__.py +0 -0
  37. {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_negative_binomial_estimator.py +0 -0
  38. {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_normal_distribution_predictor.py +0 -0
  39. {spforge-0.8.10 → spforge-0.8.13}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
  40. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/__init__.py +0 -0
  41. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_conditional_estimator.py +0 -0
  42. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
  43. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_granularity_estimator.py +0 -0
  44. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_group_by_estimator.py +0 -0
  45. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_ordinal_classifier.py +0 -0
  46. {spforge-0.8.10 → spforge-0.8.13}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
  47. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/__init__.py +0 -0
  48. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_base.py +0 -0
  49. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_lag.py +0 -0
  50. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_net_over_predicted.py +0 -0
  51. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
  52. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
  53. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
  54. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_mean_days.py +0 -0
  55. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_rolling_window.py +0 -0
  56. {spforge-0.8.10 → spforge-0.8.13}/spforge/feature_generator/_utils.py +0 -0
  57. {spforge-0.8.10 → spforge-0.8.13}/spforge/features_generator_pipeline.py +0 -0
  58. {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/__init__.py +0 -0
  59. {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
  60. {spforge-0.8.10 → spforge-0.8.13}/spforge/hyperparameter_tuning/_tuner.py +0 -0
  61. {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/__init__.py +0 -0
  62. {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/_performance_manager.py +0 -0
  63. {spforge-0.8.10 → spforge-0.8.13}/spforge/performance_transformers/_performances_transformers.py +0 -0
  64. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/__init__.py +0 -0
  65. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_base.py +0 -0
  66. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/_team_rating.py +0 -0
  67. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/enums.py +0 -0
  68. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/league_identifier.py +0 -0
  69. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/league_start_rating_optimizer.py +0 -0
  70. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/player_performance_predictor.py +0 -0
  71. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/start_rating_generator.py +0 -0
  72. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/team_performance_predictor.py +0 -0
  73. {spforge-0.8.10 → spforge-0.8.13}/spforge/ratings/team_start_rating_generator.py +0 -0
  74. {spforge-0.8.10 → spforge-0.8.13}/spforge/scorer/__init__.py +0 -0
  75. {spforge-0.8.10 → spforge-0.8.13}/spforge/scorer/_score.py +0 -0
  76. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/__init__.py +0 -0
  77. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_base.py +0 -0
  78. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_net_over_predicted.py +0 -0
  79. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_operator.py +0 -0
  80. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_other_transformer.py +0 -0
  81. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_predictor.py +0 -0
  82. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_simple_transformer.py +0 -0
  83. {spforge-0.8.10 → spforge-0.8.13}/spforge/transformers/_team_ratio_predictor.py +0 -0
  84. {spforge-0.8.10 → spforge-0.8.13}/spforge/utils.py +0 -0
  85. {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/dependency_links.txt +0 -0
  86. {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/requires.txt +0 -0
  87. {spforge-0.8.10 → spforge-0.8.13}/spforge.egg-info/top_level.txt +0 -0
  88. {spforge-0.8.10 → spforge-0.8.13}/tests/cross_validator/test_cross_validator.py +0 -0
  89. {spforge-0.8.10 → spforge-0.8.13}/tests/distributions/test_distribution.py +0 -0
  90. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
  91. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
  92. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_lol_player_kills.py +0 -0
  93. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_player_points.py +0 -0
  94. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
  95. {spforge-0.8.10 → spforge-0.8.13}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
  96. {spforge-0.8.10 → spforge-0.8.13}/tests/estimator/test_sklearn_estimator.py +0 -0
  97. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_lag.py +0 -0
  98. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
  99. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
  100. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
  101. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_mean_days.py +0 -0
  102. {spforge-0.8.10 → spforge-0.8.13}/tests/feature_generator/test_rolling_window.py +0 -0
  103. {spforge-0.8.10 → spforge-0.8.13}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
  104. {spforge-0.8.10 → spforge-0.8.13}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
  105. {spforge-0.8.10 → spforge-0.8.13}/tests/performance_transformers/test_performance_manager.py +0 -0
  106. {spforge-0.8.10 → spforge-0.8.13}/tests/performance_transformers/test_performances_transformers.py +0 -0
  107. {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_player_rating_generator.py +0 -0
  108. {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_ratings_property.py +0 -0
  109. {spforge-0.8.10 → spforge-0.8.13}/tests/ratings/test_team_rating_generator.py +0 -0
  110. {spforge-0.8.10 → spforge-0.8.13}/tests/scorer/test_score.py +0 -0
  111. {spforge-0.8.10 → spforge-0.8.13}/tests/scorer/test_score_aggregation_granularity.py +0 -0
  112. {spforge-0.8.10 → spforge-0.8.13}/tests/test_autopipeline_context.py +0 -0
  113. {spforge-0.8.10 → spforge-0.8.13}/tests/test_feature_generator_pipeline.py +0 -0
  114. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_estimator_transformer_context.py +0 -0
  115. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_net_over_predicted.py +0 -0
  116. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_other_transformer.py +0 -0
  117. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_predictor_transformer.py +0 -0
  118. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_simple_transformer.py +0 -0
  119. {spforge-0.8.10 → spforge-0.8.13}/tests/transformers/test_team_ratio_predictor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.10
3
+ Version: 0.8.13
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spforge"
7
- version = "0.8.10"
7
+ version = "0.8.13"
8
8
  description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
195
195
  return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
196
196
 
197
197
 
198
+ def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
199
+ """Recursively find innermost estimator with feature_importances_ or coef_."""
200
+ if hasattr(estimator, "feature_importances_"):
201
+ inner = _get_importance_estimator_inner(estimator)
202
+ if inner is not None:
203
+ return inner
204
+ return (estimator, "feature_importances_")
205
+
206
+ if hasattr(estimator, "coef_"):
207
+ inner = _get_importance_estimator_inner(estimator)
208
+ if inner is not None:
209
+ return inner
210
+ return (estimator, "coef_")
211
+
212
+ return _get_importance_estimator_inner(estimator)
213
+
214
+
215
+ def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
216
+ """Check wrapped estimators for importance attributes."""
217
+ # Check estimator_ (sklearn fitted wrapper convention)
218
+ if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
219
+ result = _get_importance_estimator(estimator.estimator_)
220
+ if result is not None:
221
+ return result
222
+
223
+ # Check _est (GroupByEstimator convention)
224
+ if hasattr(estimator, "_est") and estimator._est is not None:
225
+ result = _get_importance_estimator(estimator._est)
226
+ if result is not None:
227
+ return result
228
+
229
+ return None
230
+
231
+
198
232
  class AutoPipeline(BaseEstimator):
199
233
  def __init__(
200
234
  self,
@@ -627,3 +661,61 @@ class AutoPipeline(BaseEstimator):
627
661
  all_features.append(ctx)
628
662
 
629
663
  return all_features
664
+
665
+ def _get_estimator_feature_names(self) -> list[str]:
666
+ """Get feature names as seen by the final estimator after all transformations."""
667
+ pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
668
+
669
+ # Remove context columns dropped by "final" step
670
+ final_step = self.sklearn_pipeline.named_steps["final"]
671
+ drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
672
+ features = [f for f in pre_out if f not in drop_cols]
673
+
674
+ # Remove granularity columns (dropped by GroupByEstimator)
675
+ granularity_set = set(self.granularity)
676
+ features = [f for f in features if f not in granularity_set]
677
+
678
+ # Remove context features (used by wrapper estimators, not inner model)
679
+ context_set = set(self.context_feature_names)
680
+ features = [f for f in features if f not in context_set]
681
+
682
+ return features
683
+
684
+ @property
685
+ def feature_importances_(self) -> pd.DataFrame:
686
+ """Get feature importances from the fitted estimator.
687
+
688
+ Returns a DataFrame with columns ["feature", "importance"] sorted by
689
+ absolute importance descending. Works with tree-based models
690
+ (feature_importances_) and linear models (coef_).
691
+ """
692
+ if self.sklearn_pipeline is None:
693
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
694
+
695
+ est = self.sklearn_pipeline.named_steps["est"]
696
+ result = _get_importance_estimator(est)
697
+
698
+ if result is None:
699
+ raise RuntimeError(
700
+ "Estimator does not support feature importances. "
701
+ "Requires feature_importances_ or coef_ attribute."
702
+ )
703
+
704
+ inner_est, attr_name = result
705
+ raw = getattr(inner_est, attr_name)
706
+
707
+ if attr_name == "coef_":
708
+ # Linear models: use absolute value of coefficients
709
+ if raw.ndim == 2:
710
+ # Multi-class: average absolute values across classes
711
+ importances = np.abs(raw).mean(axis=0)
712
+ else:
713
+ importances = np.abs(raw)
714
+ else:
715
+ importances = raw
716
+
717
+ feature_names = self._get_estimator_feature_names()
718
+
719
+ df = pd.DataFrame({"feature": feature_names, "importance": importances})
720
+ df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
721
+ return df
@@ -34,6 +34,8 @@ from spforge.ratings.utils import (
34
34
  from spforge.feature_generator._utils import to_polars
35
35
 
36
36
  PLAYER_STATS = "__PLAYER_STATS"
37
+ _SCALED_PW = "__scaled_participation_weight__"
38
+ _SCALED_PPW = "__scaled_projected_participation_weight__"
37
39
 
38
40
 
39
41
  class PlayerRatingGenerator(RatingGenerator):
@@ -273,6 +275,7 @@ class PlayerRatingGenerator(RatingGenerator):
273
275
  self._projected_participation_weight_max = self._participation_weight_max
274
276
 
275
277
  def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
278
+ """Create internal scaled participation weight columns without mutating originals."""
276
279
  if not self.scale_participation_weights:
277
280
  return df
278
281
  if self._participation_weight_max is None or self._participation_weight_max <= 0:
@@ -287,7 +290,7 @@ class PlayerRatingGenerator(RatingGenerator):
287
290
  df = df.with_columns(
288
291
  (pl.col(cn.participation_weight) / denom)
289
292
  .clip(0.0, 1.0)
290
- .alias(cn.participation_weight)
293
+ .alias(_SCALED_PW)
291
294
  )
292
295
 
293
296
  if (
@@ -300,16 +303,38 @@ class PlayerRatingGenerator(RatingGenerator):
300
303
  df = df.with_columns(
301
304
  (pl.col(cn.projected_participation_weight) / denom)
302
305
  .clip(0.0, 1.0)
303
- .alias(cn.projected_participation_weight)
306
+ .alias(_SCALED_PPW)
304
307
  )
305
308
 
306
309
  return df
307
310
 
311
+ def _get_participation_weight_col(self) -> str:
312
+ """Get the column name to use for participation weight (scaled if available)."""
313
+ cn = self.column_names
314
+ if self.scale_participation_weights and cn and cn.participation_weight:
315
+ return _SCALED_PW
316
+ return cn.participation_weight if cn else ""
317
+
318
+ def _get_projected_participation_weight_col(self) -> str:
319
+ """Get the column name to use for projected participation weight (scaled if available)."""
320
+ cn = self.column_names
321
+ if self.scale_participation_weights and cn and cn.projected_participation_weight:
322
+ return _SCALED_PPW
323
+ return cn.projected_participation_weight if cn else ""
324
+
325
+ def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
326
+ """Remove internal scaled columns before returning."""
327
+ cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
328
+ if cols_to_drop:
329
+ df = df.drop(cols_to_drop)
330
+ return df
331
+
308
332
  def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
309
333
  df = self._scale_participation_weight_columns(df)
310
334
  match_df = self._create_match_df(df)
311
335
  ratings = self._calculate_ratings(match_df)
312
336
 
337
+ # Keep scaled columns for now - they're needed by _add_rating_features
313
338
  cols = [
314
339
  c
315
340
  for c in df.columns
@@ -329,13 +354,15 @@ class PlayerRatingGenerator(RatingGenerator):
329
354
  on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
330
355
  )
331
356
 
332
- return self._add_rating_features(df)
357
+ result = self._add_rating_features(df)
358
+ return self._remove_internal_scaled_columns(result)
333
359
 
334
360
  def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
335
361
  df = self._scale_participation_weight_columns(df)
336
362
  match_df = self._create_match_df(df)
337
363
  ratings = self._calculate_future_ratings(match_df)
338
364
 
365
+ # Keep scaled columns for now - they're needed by _add_rating_features
339
366
  cols = [
340
367
  c
341
368
  for c in df.columns
@@ -360,7 +387,8 @@ class PlayerRatingGenerator(RatingGenerator):
360
387
  how="left",
361
388
  )
362
389
 
363
- return self._add_rating_features(df_with_ratings)
390
+ result = self._add_rating_features(df_with_ratings)
391
+ return self._remove_internal_scaled_columns(result)
364
392
 
365
393
  def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
366
394
  cn = self.column_names
@@ -796,9 +824,13 @@ class PlayerRatingGenerator(RatingGenerator):
796
824
 
797
825
  if cn.participation_weight and cn.participation_weight in df.columns:
798
826
  player_stat_cols.append(cn.participation_weight)
827
+ if _SCALED_PW in df.columns:
828
+ player_stat_cols.append(_SCALED_PW)
799
829
 
800
830
  if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
801
831
  player_stat_cols.append(cn.projected_participation_weight)
832
+ if _SCALED_PPW in df.columns:
833
+ player_stat_cols.append(_SCALED_PPW)
802
834
 
803
835
  if cn.position and cn.position in df.columns:
804
836
  player_stat_cols.append(cn.position)
@@ -854,14 +886,23 @@ class PlayerRatingGenerator(RatingGenerator):
854
886
  position = team_player.get(cn.position)
855
887
  player_league = team_player.get(cn.league, None)
856
888
 
857
- participation_weight = (
858
- team_player.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
859
- )
860
- projected_participation_weight = (
861
- team_player.get(cn.projected_participation_weight, participation_weight)
862
- if cn.projected_participation_weight
863
- else participation_weight
864
- )
889
+ # Use scaled participation weight if available, otherwise use original
890
+ if _SCALED_PW in team_player:
891
+ participation_weight = team_player.get(_SCALED_PW, 1.0)
892
+ elif cn.participation_weight:
893
+ participation_weight = team_player.get(cn.participation_weight, 1.0)
894
+ else:
895
+ participation_weight = 1.0
896
+
897
+ # Use scaled projected participation weight if available, otherwise use original
898
+ if _SCALED_PPW in team_player:
899
+ projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
900
+ elif cn.projected_participation_weight:
901
+ projected_participation_weight = team_player.get(
902
+ cn.projected_participation_weight, participation_weight
903
+ )
904
+ else:
905
+ projected_participation_weight = participation_weight
865
906
  projected_participation_weights.append(projected_participation_weight)
866
907
 
867
908
  perf_val = (
@@ -1087,14 +1128,21 @@ class PlayerRatingGenerator(RatingGenerator):
1087
1128
  position = tp.get(cn.position)
1088
1129
  league = tp.get(cn.league, None)
1089
1130
 
1090
- pw = (
1091
- tp.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
1092
- )
1093
- ppw = (
1094
- tp.get(cn.projected_participation_weight, pw)
1095
- if cn.projected_participation_weight
1096
- else pw
1097
- )
1131
+ # Use scaled participation weight if available, otherwise use original
1132
+ if _SCALED_PW in tp:
1133
+ pw = tp.get(_SCALED_PW, 1.0)
1134
+ elif cn.participation_weight:
1135
+ pw = tp.get(cn.participation_weight, 1.0)
1136
+ else:
1137
+ pw = 1.0
1138
+
1139
+ # Use scaled projected participation weight if available, otherwise use original
1140
+ if _SCALED_PPW in tp:
1141
+ ppw = tp.get(_SCALED_PPW, pw)
1142
+ elif cn.projected_participation_weight:
1143
+ ppw = tp.get(cn.projected_participation_weight, pw)
1144
+ else:
1145
+ ppw = pw
1098
1146
  proj_w.append(float(ppw))
1099
1147
 
1100
1148
  mp = MatchPerformance(
@@ -2,6 +2,10 @@ import polars as pl
2
2
 
3
3
  from spforge.data_structures import ColumnNames
4
4
 
5
+ # Internal column names for scaled participation weights
6
+ _SCALED_PW = "__scaled_participation_weight__"
7
+ _SCALED_PPW = "__scaled_projected_participation_weight__"
8
+
5
9
 
6
10
  def add_team_rating(
7
11
  df: pl.DataFrame,
@@ -46,11 +50,14 @@ def add_team_rating_projected(
46
50
  tid = column_names.team_id
47
51
  ppw = column_names.projected_participation_weight
48
52
 
49
- if ppw:
53
+ # Use scaled column if available (clipped to [0, 1]), otherwise raw column
54
+ weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
55
+
56
+ if weight_col and weight_col in df.columns:
50
57
  return df.with_columns(
51
58
  (
52
- (pl.col(ppw) * pl.col(player_rating_col)).sum().over([mid, tid])
53
- / pl.col(ppw).sum().over([mid, tid])
59
+ (pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
60
+ / pl.col(weight_col).sum().over([mid, tid])
54
61
  ).alias(team_rating_out)
55
62
  )
56
63
 
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
118
125
  mid = column_names.match_id
119
126
  ppw = column_names.projected_participation_weight
120
127
 
121
- if ppw:
128
+ # Use scaled column if available (clipped to [0, 1]), otherwise raw column
129
+ weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
130
+
131
+ if weight_col and weight_col in df.columns:
122
132
  return df.with_columns(
123
133
  (
124
- (pl.col(ppw) * pl.col(player_rating_col)).sum().over(mid)
125
- / pl.col(ppw).sum().over(mid)
134
+ (pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
135
+ / pl.col(weight_col).sum().over(mid)
126
136
  ).alias(rating_mean_out)
127
137
  )
128
138
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.10
3
+ Version: 0.8.13
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -103,8 +103,10 @@ tests/hyperparameter_tuning/test_rating_tuner.py
103
103
  tests/performance_transformers/test_performance_manager.py
104
104
  tests/performance_transformers/test_performances_transformers.py
105
105
  tests/ratings/test_player_rating_generator.py
106
+ tests/ratings/test_player_rating_no_mutation.py
106
107
  tests/ratings/test_ratings_property.py
107
108
  tests/ratings/test_team_rating_generator.py
109
+ tests/ratings/test_utils_scaled_weights.py
108
110
  tests/scorer/test_score.py
109
111
  tests/scorer/test_score_aggregation_granularity.py
110
112
  tests/transformers/test_estimator_transformer_context.py
@@ -0,0 +1,214 @@
1
+ """Tests to ensure PlayerRatingGenerator does not mutate input columns."""
2
+
3
+ import polars as pl
4
+ import pytest
5
+
6
+ from spforge import ColumnNames
7
+ from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
8
+
9
+
10
+ @pytest.fixture
11
+ def cn_with_projected():
12
+ """ColumnNames with both participation_weight and projected_participation_weight."""
13
+ return ColumnNames(
14
+ player_id="pid",
15
+ team_id="tid",
16
+ match_id="mid",
17
+ start_date="dt",
18
+ update_match_id="mid",
19
+ participation_weight="minutes",
20
+ projected_participation_weight="minutes_prediction",
21
+ )
22
+
23
+
24
+ @pytest.fixture
25
+ def fit_df():
26
+ """Training data with minutes > 1 (will trigger auto-scaling)."""
27
+ return pl.DataFrame(
28
+ {
29
+ "pid": ["P1", "P2", "P3", "P4"],
30
+ "tid": ["T1", "T1", "T2", "T2"],
31
+ "mid": ["M1", "M1", "M1", "M1"],
32
+ "dt": ["2024-01-01"] * 4,
33
+ "perf": [0.6, 0.4, 0.7, 0.3],
34
+ "minutes": [30.0, 25.0, 32.0, 28.0],
35
+ "minutes_prediction": [28.0, 24.0, 30.0, 26.0],
36
+ }
37
+ )
38
+
39
+
40
+ @pytest.fixture
41
+ def future_df():
42
+ """Future prediction data with minutes > 1 (will trigger auto-scaling)."""
43
+ return pl.DataFrame(
44
+ {
45
+ "pid": ["P1", "P2", "P3", "P4"],
46
+ "tid": ["T1", "T1", "T2", "T2"],
47
+ "mid": ["M2", "M2", "M2", "M2"],
48
+ "dt": ["2024-01-02"] * 4,
49
+ "minutes": [30.0, 25.0, 32.0, 28.0],
50
+ "minutes_prediction": [28.0, 24.0, 30.0, 26.0],
51
+ }
52
+ )
53
+
54
+
55
+ def test_fit_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df):
56
+ """fit_transform should not modify the participation_weight column values."""
57
+ # Join result with original to compare values by player_id
58
+ gen = PlayerRatingGenerator(
59
+ performance_column="perf",
60
+ column_names=cn_with_projected,
61
+ auto_scale_performance=True,
62
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
63
+ )
64
+ result = gen.fit_transform(fit_df)
65
+
66
+ # Check that each player's minutes value is preserved
67
+ original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes"].to_list()))
68
+ result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
69
+
70
+ for pid, original_val in original_by_player.items():
71
+ result_val = result_by_player[pid]
72
+ assert result_val == original_val, (
73
+ f"participation_weight for player {pid} was mutated. "
74
+ f"Expected {original_val}, got {result_val}"
75
+ )
76
+
77
+
78
+ def test_fit_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df):
79
+ """fit_transform should not modify the projected_participation_weight column values."""
80
+ gen = PlayerRatingGenerator(
81
+ performance_column="perf",
82
+ column_names=cn_with_projected,
83
+ auto_scale_performance=True,
84
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
85
+ )
86
+ result = gen.fit_transform(fit_df)
87
+
88
+ # Check that each player's minutes_prediction value is preserved
89
+ original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes_prediction"].to_list()))
90
+ result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
91
+
92
+ for pid, original_val in original_by_player.items():
93
+ result_val = result_by_player[pid]
94
+ assert result_val == original_val, (
95
+ f"projected_participation_weight for player {pid} was mutated. "
96
+ f"Expected {original_val}, got {result_val}"
97
+ )
98
+
99
+
100
+ def test_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
101
+ """transform should not modify the participation_weight column values."""
102
+ gen = PlayerRatingGenerator(
103
+ performance_column="perf",
104
+ column_names=cn_with_projected,
105
+ auto_scale_performance=True,
106
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
107
+ )
108
+ gen.fit_transform(fit_df)
109
+
110
+ result = gen.transform(future_df)
111
+
112
+ # Check that each player's minutes value is preserved
113
+ original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes"].to_list()))
114
+ result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
115
+
116
+ for pid, original_val in original_by_player.items():
117
+ result_val = result_by_player[pid]
118
+ assert result_val == original_val, (
119
+ f"participation_weight for player {pid} was mutated during transform. "
120
+ f"Expected {original_val}, got {result_val}"
121
+ )
122
+
123
+
124
+ def test_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
125
+ """transform should not modify the projected_participation_weight column values."""
126
+ gen = PlayerRatingGenerator(
127
+ performance_column="perf",
128
+ column_names=cn_with_projected,
129
+ auto_scale_performance=True,
130
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
131
+ )
132
+ gen.fit_transform(fit_df)
133
+
134
+ result = gen.transform(future_df)
135
+
136
+ # Check that each player's minutes_prediction value is preserved
137
+ original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
138
+ result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
139
+
140
+ for pid, original_val in original_by_player.items():
141
+ result_val = result_by_player[pid]
142
+ assert result_val == original_val, (
143
+ f"projected_participation_weight for player {pid} was mutated during transform. "
144
+ f"Expected {original_val}, got {result_val}"
145
+ )
146
+
147
+
148
+ def test_future_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
149
+ """future_transform should not modify the participation_weight column values."""
150
+ gen = PlayerRatingGenerator(
151
+ performance_column="perf",
152
+ column_names=cn_with_projected,
153
+ auto_scale_performance=True,
154
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
155
+ )
156
+ gen.fit_transform(fit_df)
157
+
158
+ original_minutes = future_df["minutes"].to_list()
159
+ result = gen.future_transform(future_df)
160
+
161
+ # The minutes column should have the same values as before
162
+ result_minutes = result["minutes"].to_list()
163
+ assert result_minutes == original_minutes, (
164
+ f"participation_weight column was mutated during future_transform. "
165
+ f"Expected {original_minutes}, got {result_minutes}"
166
+ )
167
+
168
+
169
+ def test_future_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
170
+ """future_transform should not modify the projected_participation_weight column values."""
171
+ gen = PlayerRatingGenerator(
172
+ performance_column="perf",
173
+ column_names=cn_with_projected,
174
+ auto_scale_performance=True,
175
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
176
+ )
177
+ gen.fit_transform(fit_df)
178
+
179
+ original_minutes_pred = future_df["minutes_prediction"].to_list()
180
+ result = gen.future_transform(future_df)
181
+
182
+ # The minutes_prediction column should have the same values as before
183
+ result_minutes_pred = result["minutes_prediction"].to_list()
184
+ assert result_minutes_pred == original_minutes_pred, (
185
+ f"projected_participation_weight column was mutated during future_transform. "
186
+ f"Expected {original_minutes_pred}, got {result_minutes_pred}"
187
+ )
188
+
189
+
190
+ def test_multiple_transforms_do_not_compound_scaling(cn_with_projected, fit_df, future_df):
191
+ """Multiple transform calls should not compound the scaling effect."""
192
+ gen = PlayerRatingGenerator(
193
+ performance_column="perf",
194
+ column_names=cn_with_projected,
195
+ auto_scale_performance=True,
196
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
197
+ )
198
+ gen.fit_transform(fit_df)
199
+
200
+ # Call transform multiple times
201
+ result1 = gen.transform(future_df)
202
+ result2 = gen.transform(result1)
203
+ result3 = gen.transform(result2)
204
+
205
+ # After 3 transforms, each player's values should still be the same as original
206
+ original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
207
+ final_by_player = dict(zip(result3["pid"].to_list(), result3["minutes_prediction"].to_list()))
208
+
209
+ for pid, original_val in original_by_player.items():
210
+ final_val = final_by_player[pid]
211
+ assert final_val == original_val, (
212
+ f"Multiple transforms compounded the scaling for player {pid}. "
213
+ f"Expected {original_val}, got {final_val}"
214
+ )