spforge 0.8.9__tar.gz → 0.8.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spforge-0.8.9/spforge.egg-info → spforge-0.8.11}/PKG-INFO +1 -1
- {spforge-0.8.9 → spforge-0.8.11}/pyproject.toml +1 -1
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/_player_rating.py +101 -20
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/utils.py +16 -6
- {spforge-0.8.9 → spforge-0.8.11/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.9 → spforge-0.8.11}/spforge.egg-info/SOURCES.txt +2 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/ratings/test_player_rating_generator.py +57 -0
- spforge-0.8.11/tests/ratings/test_player_rating_no_mutation.py +214 -0
- spforge-0.8.11/tests/ratings/test_utils_scaled_weights.py +136 -0
- {spforge-0.8.9 → spforge-0.8.11}/LICENSE +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/MANIFEST.in +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/README.md +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/game_level_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/lol/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/setup.cfg +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/autopipeline.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/data_structures.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_group_by_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/scorer/_score.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_other_transformer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge/utils.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/test_autopipeline.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.9 → spforge-0.8.11}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.11
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.11"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
import copy
|
|
5
5
|
import math
|
|
6
|
+
import logging
|
|
6
7
|
from typing import Any, Literal
|
|
7
8
|
|
|
8
9
|
import narwhals.stable.v2 as nw
|
|
@@ -33,6 +34,8 @@ from spforge.ratings.utils import (
|
|
|
33
34
|
from spforge.feature_generator._utils import to_polars
|
|
34
35
|
|
|
35
36
|
PLAYER_STATS = "__PLAYER_STATS"
|
|
37
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
38
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
36
39
|
|
|
37
40
|
|
|
38
41
|
class PlayerRatingGenerator(RatingGenerator):
|
|
@@ -81,6 +84,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
81
84
|
column_names: ColumnNames | None = None,
|
|
82
85
|
output_suffix: str | None = None,
|
|
83
86
|
scale_participation_weights: bool = False,
|
|
87
|
+
auto_scale_participation_weights: bool = True,
|
|
84
88
|
**kwargs: Any,
|
|
85
89
|
):
|
|
86
90
|
super().__init__(
|
|
@@ -164,6 +168,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
164
168
|
|
|
165
169
|
self.use_off_def_split = bool(use_off_def_split)
|
|
166
170
|
self.scale_participation_weights = bool(scale_participation_weights)
|
|
171
|
+
self.auto_scale_participation_weights = bool(auto_scale_participation_weights)
|
|
167
172
|
self._participation_weight_max: float | None = None
|
|
168
173
|
self._projected_participation_weight_max: float | None = None
|
|
169
174
|
|
|
@@ -189,9 +194,39 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
189
194
|
column_names: ColumnNames | None = None,
|
|
190
195
|
) -> DataFrame | IntoFrameT:
|
|
191
196
|
self.column_names = column_names if column_names else self.column_names
|
|
197
|
+
self._maybe_enable_participation_weight_scaling(df)
|
|
192
198
|
self._set_participation_weight_max(df)
|
|
193
199
|
return super().fit_transform(df, column_names)
|
|
194
200
|
|
|
201
|
+
def _maybe_enable_participation_weight_scaling(self, df: DataFrame) -> None:
|
|
202
|
+
if self.scale_participation_weights or not self.auto_scale_participation_weights:
|
|
203
|
+
return
|
|
204
|
+
cn = self.column_names
|
|
205
|
+
if not cn:
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
pl_df = df.to_native() if df.implementation.is_polars() else df.to_polars().to_native()
|
|
209
|
+
|
|
210
|
+
def _out_of_bounds(col_name: str | None) -> bool:
|
|
211
|
+
if not col_name or col_name not in df.columns:
|
|
212
|
+
return False
|
|
213
|
+
col = pl_df[col_name]
|
|
214
|
+
min_val = col.min()
|
|
215
|
+
max_val = col.max()
|
|
216
|
+
if min_val is None or max_val is None:
|
|
217
|
+
return False
|
|
218
|
+
eps = 1e-6
|
|
219
|
+
return min_val < -eps or max_val > (1.0 + eps)
|
|
220
|
+
|
|
221
|
+
if _out_of_bounds(cn.participation_weight) or _out_of_bounds(
|
|
222
|
+
cn.projected_participation_weight
|
|
223
|
+
):
|
|
224
|
+
self.scale_participation_weights = True
|
|
225
|
+
logging.warning(
|
|
226
|
+
"Auto-scaling participation weights because values exceed [0, 1]. "
|
|
227
|
+
"Set scale_participation_weights=True explicitly to silence this warning."
|
|
228
|
+
)
|
|
229
|
+
|
|
195
230
|
def _ensure_player_off(self, player_id: str) -> PlayerRating:
|
|
196
231
|
if player_id not in self._player_off_ratings:
|
|
197
232
|
# create with start generator later; initialize to 0 now; overwritten when needed
|
|
@@ -240,6 +275,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
240
275
|
self._projected_participation_weight_max = self._participation_weight_max
|
|
241
276
|
|
|
242
277
|
def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
278
|
+
"""Create internal scaled participation weight columns without mutating originals."""
|
|
243
279
|
if not self.scale_participation_weights:
|
|
244
280
|
return df
|
|
245
281
|
if self._participation_weight_max is None or self._participation_weight_max <= 0:
|
|
@@ -254,7 +290,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
254
290
|
df = df.with_columns(
|
|
255
291
|
(pl.col(cn.participation_weight) / denom)
|
|
256
292
|
.clip(0.0, 1.0)
|
|
257
|
-
.alias(
|
|
293
|
+
.alias(_SCALED_PW)
|
|
258
294
|
)
|
|
259
295
|
|
|
260
296
|
if (
|
|
@@ -267,16 +303,38 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
267
303
|
df = df.with_columns(
|
|
268
304
|
(pl.col(cn.projected_participation_weight) / denom)
|
|
269
305
|
.clip(0.0, 1.0)
|
|
270
|
-
.alias(
|
|
306
|
+
.alias(_SCALED_PPW)
|
|
271
307
|
)
|
|
272
308
|
|
|
273
309
|
return df
|
|
274
310
|
|
|
311
|
+
def _get_participation_weight_col(self) -> str:
|
|
312
|
+
"""Get the column name to use for participation weight (scaled if available)."""
|
|
313
|
+
cn = self.column_names
|
|
314
|
+
if self.scale_participation_weights and cn and cn.participation_weight:
|
|
315
|
+
return _SCALED_PW
|
|
316
|
+
return cn.participation_weight if cn else ""
|
|
317
|
+
|
|
318
|
+
def _get_projected_participation_weight_col(self) -> str:
|
|
319
|
+
"""Get the column name to use for projected participation weight (scaled if available)."""
|
|
320
|
+
cn = self.column_names
|
|
321
|
+
if self.scale_participation_weights and cn and cn.projected_participation_weight:
|
|
322
|
+
return _SCALED_PPW
|
|
323
|
+
return cn.projected_participation_weight if cn else ""
|
|
324
|
+
|
|
325
|
+
def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
326
|
+
"""Remove internal scaled columns before returning."""
|
|
327
|
+
cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
|
|
328
|
+
if cols_to_drop:
|
|
329
|
+
df = df.drop(cols_to_drop)
|
|
330
|
+
return df
|
|
331
|
+
|
|
275
332
|
def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
276
333
|
df = self._scale_participation_weight_columns(df)
|
|
277
334
|
match_df = self._create_match_df(df)
|
|
278
335
|
ratings = self._calculate_ratings(match_df)
|
|
279
336
|
|
|
337
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
280
338
|
cols = [
|
|
281
339
|
c
|
|
282
340
|
for c in df.columns
|
|
@@ -296,13 +354,15 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
296
354
|
on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
|
|
297
355
|
)
|
|
298
356
|
|
|
299
|
-
|
|
357
|
+
result = self._add_rating_features(df)
|
|
358
|
+
return self._remove_internal_scaled_columns(result)
|
|
300
359
|
|
|
301
360
|
def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
302
361
|
df = self._scale_participation_weight_columns(df)
|
|
303
362
|
match_df = self._create_match_df(df)
|
|
304
363
|
ratings = self._calculate_future_ratings(match_df)
|
|
305
364
|
|
|
365
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
306
366
|
cols = [
|
|
307
367
|
c
|
|
308
368
|
for c in df.columns
|
|
@@ -327,7 +387,8 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
327
387
|
how="left",
|
|
328
388
|
)
|
|
329
389
|
|
|
330
|
-
|
|
390
|
+
result = self._add_rating_features(df_with_ratings)
|
|
391
|
+
return self._remove_internal_scaled_columns(result)
|
|
331
392
|
|
|
332
393
|
def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
|
|
333
394
|
cn = self.column_names
|
|
@@ -763,9 +824,13 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
763
824
|
|
|
764
825
|
if cn.participation_weight and cn.participation_weight in df.columns:
|
|
765
826
|
player_stat_cols.append(cn.participation_weight)
|
|
827
|
+
if _SCALED_PW in df.columns:
|
|
828
|
+
player_stat_cols.append(_SCALED_PW)
|
|
766
829
|
|
|
767
830
|
if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
|
|
768
831
|
player_stat_cols.append(cn.projected_participation_weight)
|
|
832
|
+
if _SCALED_PPW in df.columns:
|
|
833
|
+
player_stat_cols.append(_SCALED_PPW)
|
|
769
834
|
|
|
770
835
|
if cn.position and cn.position in df.columns:
|
|
771
836
|
player_stat_cols.append(cn.position)
|
|
@@ -821,14 +886,23 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
821
886
|
position = team_player.get(cn.position)
|
|
822
887
|
player_league = team_player.get(cn.league, None)
|
|
823
888
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
team_player.get(cn.
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
889
|
+
# Use scaled participation weight if available, otherwise use original
|
|
890
|
+
if _SCALED_PW in team_player:
|
|
891
|
+
participation_weight = team_player.get(_SCALED_PW, 1.0)
|
|
892
|
+
elif cn.participation_weight:
|
|
893
|
+
participation_weight = team_player.get(cn.participation_weight, 1.0)
|
|
894
|
+
else:
|
|
895
|
+
participation_weight = 1.0
|
|
896
|
+
|
|
897
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
898
|
+
if _SCALED_PPW in team_player:
|
|
899
|
+
projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
|
|
900
|
+
elif cn.projected_participation_weight:
|
|
901
|
+
projected_participation_weight = team_player.get(
|
|
902
|
+
cn.projected_participation_weight, participation_weight
|
|
903
|
+
)
|
|
904
|
+
else:
|
|
905
|
+
projected_participation_weight = participation_weight
|
|
832
906
|
projected_participation_weights.append(projected_participation_weight)
|
|
833
907
|
|
|
834
908
|
perf_val = (
|
|
@@ -1054,14 +1128,21 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1054
1128
|
position = tp.get(cn.position)
|
|
1055
1129
|
league = tp.get(cn.league, None)
|
|
1056
1130
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
tp.get(cn.
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1131
|
+
# Use scaled participation weight if available, otherwise use original
|
|
1132
|
+
if _SCALED_PW in tp:
|
|
1133
|
+
pw = tp.get(_SCALED_PW, 1.0)
|
|
1134
|
+
elif cn.participation_weight:
|
|
1135
|
+
pw = tp.get(cn.participation_weight, 1.0)
|
|
1136
|
+
else:
|
|
1137
|
+
pw = 1.0
|
|
1138
|
+
|
|
1139
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
1140
|
+
if _SCALED_PPW in tp:
|
|
1141
|
+
ppw = tp.get(_SCALED_PPW, pw)
|
|
1142
|
+
elif cn.projected_participation_weight:
|
|
1143
|
+
ppw = tp.get(cn.projected_participation_weight, pw)
|
|
1144
|
+
else:
|
|
1145
|
+
ppw = pw
|
|
1065
1146
|
proj_w.append(float(ppw))
|
|
1066
1147
|
|
|
1067
1148
|
mp = MatchPerformance(
|
|
@@ -2,6 +2,10 @@ import polars as pl
|
|
|
2
2
|
|
|
3
3
|
from spforge.data_structures import ColumnNames
|
|
4
4
|
|
|
5
|
+
# Internal column names for scaled participation weights
|
|
6
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
7
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
8
|
+
|
|
5
9
|
|
|
6
10
|
def add_team_rating(
|
|
7
11
|
df: pl.DataFrame,
|
|
@@ -46,11 +50,14 @@ def add_team_rating_projected(
|
|
|
46
50
|
tid = column_names.team_id
|
|
47
51
|
ppw = column_names.projected_participation_weight
|
|
48
52
|
|
|
49
|
-
if
|
|
53
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
54
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
55
|
+
|
|
56
|
+
if weight_col and weight_col in df.columns:
|
|
50
57
|
return df.with_columns(
|
|
51
58
|
(
|
|
52
|
-
(pl.col(
|
|
53
|
-
/ pl.col(
|
|
59
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
|
|
60
|
+
/ pl.col(weight_col).sum().over([mid, tid])
|
|
54
61
|
).alias(team_rating_out)
|
|
55
62
|
)
|
|
56
63
|
|
|
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
|
|
|
118
125
|
mid = column_names.match_id
|
|
119
126
|
ppw = column_names.projected_participation_weight
|
|
120
127
|
|
|
121
|
-
if
|
|
128
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
129
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
130
|
+
|
|
131
|
+
if weight_col and weight_col in df.columns:
|
|
122
132
|
return df.with_columns(
|
|
123
133
|
(
|
|
124
|
-
(pl.col(
|
|
125
|
-
/ pl.col(
|
|
134
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
|
|
135
|
+
/ pl.col(weight_col).sum().over(mid)
|
|
126
136
|
).alias(rating_mean_out)
|
|
127
137
|
)
|
|
128
138
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.11
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -103,8 +103,10 @@ tests/hyperparameter_tuning/test_rating_tuner.py
|
|
|
103
103
|
tests/performance_transformers/test_performance_manager.py
|
|
104
104
|
tests/performance_transformers/test_performances_transformers.py
|
|
105
105
|
tests/ratings/test_player_rating_generator.py
|
|
106
|
+
tests/ratings/test_player_rating_no_mutation.py
|
|
106
107
|
tests/ratings/test_ratings_property.py
|
|
107
108
|
tests/ratings/test_team_rating_generator.py
|
|
109
|
+
tests/ratings/test_utils_scaled_weights.py
|
|
108
110
|
tests/scorer/test_score.py
|
|
109
111
|
tests/scorer/test_score_aggregation_granularity.py
|
|
110
112
|
tests/transformers/test_estimator_transformer_context.py
|
|
@@ -551,6 +551,63 @@ def test_fit_transform_scales_participation_weight_by_fit_quantile(base_cn):
|
|
|
551
551
|
assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
|
|
552
552
|
|
|
553
553
|
|
|
554
|
+
def test_fit_transform_auto_scales_participation_weight_when_out_of_bounds(base_cn):
|
|
555
|
+
"""Automatically enable scaling when participation weights exceed [0, 1]."""
|
|
556
|
+
df = pl.DataFrame(
|
|
557
|
+
{
|
|
558
|
+
"pid": ["P1", "P2", "O1", "O2"],
|
|
559
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
560
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
561
|
+
"dt": ["2024-01-01"] * 4,
|
|
562
|
+
"perf": [0.9, 0.9, 0.1, 0.1],
|
|
563
|
+
"pw": [10.0, 20.0, 10.0, 10.0],
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
gen = PlayerRatingGenerator(
|
|
567
|
+
performance_column="perf",
|
|
568
|
+
column_names=base_cn,
|
|
569
|
+
auto_scale_performance=True,
|
|
570
|
+
start_harcoded_start_rating=1000.0,
|
|
571
|
+
)
|
|
572
|
+
gen.fit_transform(df)
|
|
573
|
+
|
|
574
|
+
start_rating = 1000.0
|
|
575
|
+
p1_change = gen._player_off_ratings["P1"].rating_value - start_rating
|
|
576
|
+
p2_change = gen._player_off_ratings["P2"].rating_value - start_rating
|
|
577
|
+
|
|
578
|
+
q = df["pw"].quantile(0.99, "linear")
|
|
579
|
+
expected_ratio = min(1.0, 10.0 / q) / min(1.0, 20.0 / q)
|
|
580
|
+
|
|
581
|
+
assert gen.scale_participation_weights is True
|
|
582
|
+
assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def test_fit_transform_auto_scale_logs_warning_when_out_of_bounds(base_cn, caplog):
|
|
586
|
+
"""Auto-scaling should emit a warning when participation weights exceed [0, 1]."""
|
|
587
|
+
df = pl.DataFrame(
|
|
588
|
+
{
|
|
589
|
+
"pid": ["P1", "P2", "O1", "O2"],
|
|
590
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
591
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
592
|
+
"dt": ["2024-01-01"] * 4,
|
|
593
|
+
"perf": [0.9, 0.9, 0.1, 0.1],
|
|
594
|
+
"pw": [10.0, 20.0, 10.0, 10.0],
|
|
595
|
+
}
|
|
596
|
+
)
|
|
597
|
+
gen = PlayerRatingGenerator(
|
|
598
|
+
performance_column="perf",
|
|
599
|
+
column_names=base_cn,
|
|
600
|
+
auto_scale_performance=True,
|
|
601
|
+
start_harcoded_start_rating=1000.0,
|
|
602
|
+
)
|
|
603
|
+
with caplog.at_level("WARNING"):
|
|
604
|
+
gen.fit_transform(df)
|
|
605
|
+
|
|
606
|
+
assert any(
|
|
607
|
+
"Auto-scaling participation weights" in record.message for record in caplog.records
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
|
|
554
611
|
def test_future_transform_scales_projected_participation_weight_by_fit_quantile():
|
|
555
612
|
"""Future projected participation weights should scale with fit quantile and be clipped."""
|
|
556
613
|
cn = ColumnNames(
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Tests to ensure PlayerRatingGenerator does not mutate input columns."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from spforge import ColumnNames
|
|
7
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def cn_with_projected():
|
|
12
|
+
"""ColumnNames with both participation_weight and projected_participation_weight."""
|
|
13
|
+
return ColumnNames(
|
|
14
|
+
player_id="pid",
|
|
15
|
+
team_id="tid",
|
|
16
|
+
match_id="mid",
|
|
17
|
+
start_date="dt",
|
|
18
|
+
update_match_id="mid",
|
|
19
|
+
participation_weight="minutes",
|
|
20
|
+
projected_participation_weight="minutes_prediction",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def fit_df():
|
|
26
|
+
"""Training data with minutes > 1 (will trigger auto-scaling)."""
|
|
27
|
+
return pl.DataFrame(
|
|
28
|
+
{
|
|
29
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
30
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
31
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
32
|
+
"dt": ["2024-01-01"] * 4,
|
|
33
|
+
"perf": [0.6, 0.4, 0.7, 0.3],
|
|
34
|
+
"minutes": [30.0, 25.0, 32.0, 28.0],
|
|
35
|
+
"minutes_prediction": [28.0, 24.0, 30.0, 26.0],
|
|
36
|
+
}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.fixture
|
|
41
|
+
def future_df():
|
|
42
|
+
"""Future prediction data with minutes > 1 (will trigger auto-scaling)."""
|
|
43
|
+
return pl.DataFrame(
|
|
44
|
+
{
|
|
45
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
46
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
47
|
+
"mid": ["M2", "M2", "M2", "M2"],
|
|
48
|
+
"dt": ["2024-01-02"] * 4,
|
|
49
|
+
"minutes": [30.0, 25.0, 32.0, 28.0],
|
|
50
|
+
"minutes_prediction": [28.0, 24.0, 30.0, 26.0],
|
|
51
|
+
}
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_fit_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df):
|
|
56
|
+
"""fit_transform should not modify the participation_weight column values."""
|
|
57
|
+
# Join result with original to compare values by player_id
|
|
58
|
+
gen = PlayerRatingGenerator(
|
|
59
|
+
performance_column="perf",
|
|
60
|
+
column_names=cn_with_projected,
|
|
61
|
+
auto_scale_performance=True,
|
|
62
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
63
|
+
)
|
|
64
|
+
result = gen.fit_transform(fit_df)
|
|
65
|
+
|
|
66
|
+
# Check that each player's minutes value is preserved
|
|
67
|
+
original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes"].to_list()))
|
|
68
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
|
|
69
|
+
|
|
70
|
+
for pid, original_val in original_by_player.items():
|
|
71
|
+
result_val = result_by_player[pid]
|
|
72
|
+
assert result_val == original_val, (
|
|
73
|
+
f"participation_weight for player {pid} was mutated. "
|
|
74
|
+
f"Expected {original_val}, got {result_val}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_fit_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df):
|
|
79
|
+
"""fit_transform should not modify the projected_participation_weight column values."""
|
|
80
|
+
gen = PlayerRatingGenerator(
|
|
81
|
+
performance_column="perf",
|
|
82
|
+
column_names=cn_with_projected,
|
|
83
|
+
auto_scale_performance=True,
|
|
84
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
85
|
+
)
|
|
86
|
+
result = gen.fit_transform(fit_df)
|
|
87
|
+
|
|
88
|
+
# Check that each player's minutes_prediction value is preserved
|
|
89
|
+
original_by_player = dict(zip(fit_df["pid"].to_list(), fit_df["minutes_prediction"].to_list()))
|
|
90
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
|
|
91
|
+
|
|
92
|
+
for pid, original_val in original_by_player.items():
|
|
93
|
+
result_val = result_by_player[pid]
|
|
94
|
+
assert result_val == original_val, (
|
|
95
|
+
f"projected_participation_weight for player {pid} was mutated. "
|
|
96
|
+
f"Expected {original_val}, got {result_val}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
|
|
101
|
+
"""transform should not modify the participation_weight column values."""
|
|
102
|
+
gen = PlayerRatingGenerator(
|
|
103
|
+
performance_column="perf",
|
|
104
|
+
column_names=cn_with_projected,
|
|
105
|
+
auto_scale_performance=True,
|
|
106
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
107
|
+
)
|
|
108
|
+
gen.fit_transform(fit_df)
|
|
109
|
+
|
|
110
|
+
result = gen.transform(future_df)
|
|
111
|
+
|
|
112
|
+
# Check that each player's minutes value is preserved
|
|
113
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes"].to_list()))
|
|
114
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes"].to_list()))
|
|
115
|
+
|
|
116
|
+
for pid, original_val in original_by_player.items():
|
|
117
|
+
result_val = result_by_player[pid]
|
|
118
|
+
assert result_val == original_val, (
|
|
119
|
+
f"participation_weight for player {pid} was mutated during transform. "
|
|
120
|
+
f"Expected {original_val}, got {result_val}"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
|
|
125
|
+
"""transform should not modify the projected_participation_weight column values."""
|
|
126
|
+
gen = PlayerRatingGenerator(
|
|
127
|
+
performance_column="perf",
|
|
128
|
+
column_names=cn_with_projected,
|
|
129
|
+
auto_scale_performance=True,
|
|
130
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
131
|
+
)
|
|
132
|
+
gen.fit_transform(fit_df)
|
|
133
|
+
|
|
134
|
+
result = gen.transform(future_df)
|
|
135
|
+
|
|
136
|
+
# Check that each player's minutes_prediction value is preserved
|
|
137
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
|
|
138
|
+
result_by_player = dict(zip(result["pid"].to_list(), result["minutes_prediction"].to_list()))
|
|
139
|
+
|
|
140
|
+
for pid, original_val in original_by_player.items():
|
|
141
|
+
result_val = result_by_player[pid]
|
|
142
|
+
assert result_val == original_val, (
|
|
143
|
+
f"projected_participation_weight for player {pid} was mutated during transform. "
|
|
144
|
+
f"Expected {original_val}, got {result_val}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_future_transform_does_not_mutate_participation_weight(cn_with_projected, fit_df, future_df):
|
|
149
|
+
"""future_transform should not modify the participation_weight column values."""
|
|
150
|
+
gen = PlayerRatingGenerator(
|
|
151
|
+
performance_column="perf",
|
|
152
|
+
column_names=cn_with_projected,
|
|
153
|
+
auto_scale_performance=True,
|
|
154
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
155
|
+
)
|
|
156
|
+
gen.fit_transform(fit_df)
|
|
157
|
+
|
|
158
|
+
original_minutes = future_df["minutes"].to_list()
|
|
159
|
+
result = gen.future_transform(future_df)
|
|
160
|
+
|
|
161
|
+
# The minutes column should have the same values as before
|
|
162
|
+
result_minutes = result["minutes"].to_list()
|
|
163
|
+
assert result_minutes == original_minutes, (
|
|
164
|
+
f"participation_weight column was mutated during future_transform. "
|
|
165
|
+
f"Expected {original_minutes}, got {result_minutes}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_future_transform_does_not_mutate_projected_participation_weight(cn_with_projected, fit_df, future_df):
|
|
170
|
+
"""future_transform should not modify the projected_participation_weight column values."""
|
|
171
|
+
gen = PlayerRatingGenerator(
|
|
172
|
+
performance_column="perf",
|
|
173
|
+
column_names=cn_with_projected,
|
|
174
|
+
auto_scale_performance=True,
|
|
175
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
176
|
+
)
|
|
177
|
+
gen.fit_transform(fit_df)
|
|
178
|
+
|
|
179
|
+
original_minutes_pred = future_df["minutes_prediction"].to_list()
|
|
180
|
+
result = gen.future_transform(future_df)
|
|
181
|
+
|
|
182
|
+
# The minutes_prediction column should have the same values as before
|
|
183
|
+
result_minutes_pred = result["minutes_prediction"].to_list()
|
|
184
|
+
assert result_minutes_pred == original_minutes_pred, (
|
|
185
|
+
f"projected_participation_weight column was mutated during future_transform. "
|
|
186
|
+
f"Expected {original_minutes_pred}, got {result_minutes_pred}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_multiple_transforms_do_not_compound_scaling(cn_with_projected, fit_df, future_df):
|
|
191
|
+
"""Multiple transform calls should not compound the scaling effect."""
|
|
192
|
+
gen = PlayerRatingGenerator(
|
|
193
|
+
performance_column="perf",
|
|
194
|
+
column_names=cn_with_projected,
|
|
195
|
+
auto_scale_performance=True,
|
|
196
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
197
|
+
)
|
|
198
|
+
gen.fit_transform(fit_df)
|
|
199
|
+
|
|
200
|
+
# Call transform multiple times
|
|
201
|
+
result1 = gen.transform(future_df)
|
|
202
|
+
result2 = gen.transform(result1)
|
|
203
|
+
result3 = gen.transform(result2)
|
|
204
|
+
|
|
205
|
+
# After 3 transforms, each player's values should still be the same as original
|
|
206
|
+
original_by_player = dict(zip(future_df["pid"].to_list(), future_df["minutes_prediction"].to_list()))
|
|
207
|
+
final_by_player = dict(zip(result3["pid"].to_list(), result3["minutes_prediction"].to_list()))
|
|
208
|
+
|
|
209
|
+
for pid, original_val in original_by_player.items():
|
|
210
|
+
final_val = final_by_player[pid]
|
|
211
|
+
assert final_val == original_val, (
|
|
212
|
+
f"Multiple transforms compounded the scaling for player {pid}. "
|
|
213
|
+
f"Expected {original_val}, got {final_val}"
|
|
214
|
+
)
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Tests to ensure utility functions use scaled participation weights when available."""
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from spforge import ColumnNames
|
|
7
|
+
from spforge.ratings.utils import (
|
|
8
|
+
_SCALED_PPW,
|
|
9
|
+
add_team_rating_projected,
|
|
10
|
+
add_rating_mean_projected,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture
|
|
15
|
+
def column_names():
|
|
16
|
+
return ColumnNames(
|
|
17
|
+
player_id="pid",
|
|
18
|
+
team_id="tid",
|
|
19
|
+
match_id="mid",
|
|
20
|
+
start_date="dt",
|
|
21
|
+
projected_participation_weight="ppw",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture
|
|
26
|
+
def df_with_scaled():
|
|
27
|
+
"""DataFrame with both raw and scaled projected participation weights."""
|
|
28
|
+
return pl.DataFrame({
|
|
29
|
+
"pid": ["A", "B", "C", "D"],
|
|
30
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
31
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
32
|
+
"dt": ["2024-01-01"] * 4,
|
|
33
|
+
"rating": [1100.0, 900.0, 1050.0, 950.0],
|
|
34
|
+
"ppw": [20.0, 5.0, 10.0, 10.0], # Raw weights (would give wrong answer)
|
|
35
|
+
_SCALED_PPW: [1.0, 0.5, 1.0, 1.0], # Scaled/clipped weights
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.fixture
|
|
40
|
+
def df_without_scaled():
|
|
41
|
+
"""DataFrame with only raw projected participation weights (no scaled column)."""
|
|
42
|
+
return pl.DataFrame({
|
|
43
|
+
"pid": ["A", "B", "C", "D"],
|
|
44
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
45
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
46
|
+
"dt": ["2024-01-01"] * 4,
|
|
47
|
+
"rating": [1100.0, 900.0, 1050.0, 950.0],
|
|
48
|
+
"ppw": [0.8, 0.4, 1.0, 1.0], # Already scaled weights
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_add_team_rating_projected_uses_scaled_column(column_names, df_with_scaled):
|
|
53
|
+
"""add_team_rating_projected should use _SCALED_PPW when available."""
|
|
54
|
+
result = add_team_rating_projected(
|
|
55
|
+
df=df_with_scaled,
|
|
56
|
+
column_names=column_names,
|
|
57
|
+
player_rating_col="rating",
|
|
58
|
+
team_rating_out="team_rating",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# With scaled weights (1.0, 0.5), T1 team rating = (1100*1.0 + 900*0.5) / (1.0+0.5) = 1450/1.5 = 966.67
|
|
62
|
+
# If it used raw weights (20.0, 5.0), it would be (1100*20 + 900*5) / 25 = 26500/25 = 1060
|
|
63
|
+
t1_rating = result.filter(pl.col("tid") == "T1")["team_rating"][0]
|
|
64
|
+
|
|
65
|
+
expected_with_scaled = (1100.0 * 1.0 + 900.0 * 0.5) / (1.0 + 0.5)
|
|
66
|
+
wrong_with_raw = (1100.0 * 20.0 + 900.0 * 5.0) / (20.0 + 5.0)
|
|
67
|
+
|
|
68
|
+
assert t1_rating == pytest.approx(expected_with_scaled, rel=1e-6)
|
|
69
|
+
assert t1_rating != pytest.approx(wrong_with_raw, rel=1e-6)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_add_team_rating_projected_falls_back_to_raw(column_names, df_without_scaled):
|
|
73
|
+
"""add_team_rating_projected should use raw ppw when _SCALED_PPW is not available."""
|
|
74
|
+
result = add_team_rating_projected(
|
|
75
|
+
df=df_without_scaled,
|
|
76
|
+
column_names=column_names,
|
|
77
|
+
player_rating_col="rating",
|
|
78
|
+
team_rating_out="team_rating",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# With raw weights (0.8, 0.4), T1 team rating = (1100*0.8 + 900*0.4) / (0.8+0.4) = 1240/1.2 = 1033.33
|
|
82
|
+
t1_rating = result.filter(pl.col("tid") == "T1")["team_rating"][0]
|
|
83
|
+
|
|
84
|
+
expected = (1100.0 * 0.8 + 900.0 * 0.4) / (0.8 + 0.4)
|
|
85
|
+
assert t1_rating == pytest.approx(expected, rel=1e-6)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def test_add_rating_mean_projected_uses_scaled_column(column_names, df_with_scaled):
|
|
89
|
+
"""add_rating_mean_projected should use _SCALED_PPW when available."""
|
|
90
|
+
result = add_rating_mean_projected(
|
|
91
|
+
df=df_with_scaled,
|
|
92
|
+
column_names=column_names,
|
|
93
|
+
player_rating_col="rating",
|
|
94
|
+
rating_mean_out="mean_rating",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# With scaled weights, mean = (1100*1.0 + 900*0.5 + 1050*1.0 + 950*1.0) / (1.0+0.5+1.0+1.0)
|
|
98
|
+
# = (1100 + 450 + 1050 + 950) / 3.5 = 3550/3.5 = 1014.29
|
|
99
|
+
mean_rating = result["mean_rating"][0]
|
|
100
|
+
|
|
101
|
+
expected_with_scaled = (1100.0*1.0 + 900.0*0.5 + 1050.0*1.0 + 950.0*1.0) / (1.0+0.5+1.0+1.0)
|
|
102
|
+
wrong_with_raw = (1100.0*20.0 + 900.0*5.0 + 1050.0*10.0 + 950.0*10.0) / (20.0+5.0+10.0+10.0)
|
|
103
|
+
|
|
104
|
+
assert mean_rating == pytest.approx(expected_with_scaled, rel=1e-6)
|
|
105
|
+
assert mean_rating != pytest.approx(wrong_with_raw, rel=1e-6)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_add_rating_mean_projected_falls_back_to_raw(column_names, df_without_scaled):
|
|
109
|
+
"""add_rating_mean_projected should use raw ppw when _SCALED_PPW is not available."""
|
|
110
|
+
result = add_rating_mean_projected(
|
|
111
|
+
df=df_without_scaled,
|
|
112
|
+
column_names=column_names,
|
|
113
|
+
player_rating_col="rating",
|
|
114
|
+
rating_mean_out="mean_rating",
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# With raw weights (0.8, 0.4, 1.0, 1.0)
|
|
118
|
+
mean_rating = result["mean_rating"][0]
|
|
119
|
+
|
|
120
|
+
expected = (1100.0*0.8 + 900.0*0.4 + 1050.0*1.0 + 950.0*1.0) / (0.8+0.4+1.0+1.0)
|
|
121
|
+
assert mean_rating == pytest.approx(expected, rel=1e-6)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_scaled_weights_not_in_output(column_names, df_with_scaled):
|
|
125
|
+
"""Verify utility functions don't add scaled columns to output unnecessarily."""
|
|
126
|
+
result = add_team_rating_projected(
|
|
127
|
+
df=df_with_scaled,
|
|
128
|
+
column_names=column_names,
|
|
129
|
+
player_rating_col="rating",
|
|
130
|
+
team_rating_out="team_rating",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# The scaled column should still be present (it was in input)
|
|
134
|
+
# but no new internal columns should be added
|
|
135
|
+
assert _SCALED_PPW in result.columns
|
|
136
|
+
assert "team_rating" in result.columns
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.9 → spforge-0.8.11}/spforge/performance_transformers/_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.9 → spforge-0.8.11}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.9 → spforge-0.8.11}/tests/feature_generator/test_regressor_feature_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.9 → spforge-0.8.11}/tests/performance_transformers/test_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|