spforge 0.8.23__py3-none-any.whl → 0.8.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/feature_generator/_base.py +2 -0
- spforge/ratings/_player_rating.py +80 -16
- spforge/ratings/player_performance_predictor.py +1 -1
- {spforge-0.8.23.dist-info → spforge-0.8.25.dist-info}/METADATA +1 -1
- {spforge-0.8.23.dist-info → spforge-0.8.25.dist-info}/RECORD +10 -10
- tests/feature_generator/test_rolling_window.py +36 -0
- tests/ratings/test_player_rating_generator.py +280 -0
- {spforge-0.8.23.dist-info → spforge-0.8.25.dist-info}/WHEEL +0 -0
- {spforge-0.8.23.dist-info → spforge-0.8.25.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.23.dist-info → spforge-0.8.25.dist-info}/top_level.txt +0 -0
|
@@ -330,7 +330,21 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
330
330
|
df = df.drop(cols_to_drop)
|
|
331
331
|
return df
|
|
332
332
|
|
|
333
|
+
def _validate_playing_time_columns(self, df: pl.DataFrame) -> None:
|
|
334
|
+
cn = self.column_names
|
|
335
|
+
if cn.team_players_playing_time and cn.team_players_playing_time not in df.columns:
|
|
336
|
+
raise ValueError(
|
|
337
|
+
f"team_players_playing_time column '{cn.team_players_playing_time}' "
|
|
338
|
+
f"not found in DataFrame. Available columns: {list(df.columns)}"
|
|
339
|
+
)
|
|
340
|
+
if cn.opponent_players_playing_time and cn.opponent_players_playing_time not in df.columns:
|
|
341
|
+
raise ValueError(
|
|
342
|
+
f"opponent_players_playing_time column '{cn.opponent_players_playing_time}' "
|
|
343
|
+
f"not found in DataFrame. Available columns: {list(df.columns)}"
|
|
344
|
+
)
|
|
345
|
+
|
|
333
346
|
def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
347
|
+
self._validate_playing_time_columns(df)
|
|
334
348
|
df = self._scale_participation_weight_columns(df)
|
|
335
349
|
match_df = self._create_match_df(df)
|
|
336
350
|
ratings = self._calculate_ratings(match_df)
|
|
@@ -359,6 +373,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
359
373
|
return self._remove_internal_scaled_columns(result)
|
|
360
374
|
|
|
361
375
|
def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
376
|
+
self._validate_playing_time_columns(df)
|
|
362
377
|
df = self._scale_participation_weight_columns(df)
|
|
363
378
|
match_df = self._create_match_df(df)
|
|
364
379
|
ratings = self._calculate_future_ratings(match_df)
|
|
@@ -466,10 +481,14 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
466
481
|
pred_off = self._performance_predictor.predict_performance(
|
|
467
482
|
player_rating=pre_player,
|
|
468
483
|
opponent_team_rating=PreMatchTeamRating(
|
|
469
|
-
id=team2,
|
|
484
|
+
id=team2,
|
|
485
|
+
players=c2.pre_match_player_ratings,
|
|
486
|
+
rating_value=team2_def_rating,
|
|
470
487
|
),
|
|
471
488
|
team_rating=PreMatchTeamRating(
|
|
472
|
-
id=team1,
|
|
489
|
+
id=team1,
|
|
490
|
+
players=c1.pre_match_player_ratings,
|
|
491
|
+
rating_value=team1_off_rating,
|
|
473
492
|
),
|
|
474
493
|
)
|
|
475
494
|
|
|
@@ -484,10 +503,14 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
484
503
|
other=getattr(pre_player, "other", None),
|
|
485
504
|
),
|
|
486
505
|
opponent_team_rating=PreMatchTeamRating(
|
|
487
|
-
id=team2,
|
|
506
|
+
id=team2,
|
|
507
|
+
players=c2.pre_match_player_ratings,
|
|
508
|
+
rating_value=team2_off_rating,
|
|
488
509
|
),
|
|
489
510
|
team_rating=PreMatchTeamRating(
|
|
490
|
-
id=team1,
|
|
511
|
+
id=team1,
|
|
512
|
+
players=c1.pre_match_player_ratings,
|
|
513
|
+
rating_value=team1_def_rating,
|
|
491
514
|
),
|
|
492
515
|
)
|
|
493
516
|
|
|
@@ -551,10 +574,14 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
551
574
|
pred_off = self._performance_predictor.predict_performance(
|
|
552
575
|
player_rating=pre_player,
|
|
553
576
|
opponent_team_rating=PreMatchTeamRating(
|
|
554
|
-
id=team1,
|
|
577
|
+
id=team1,
|
|
578
|
+
players=c1.pre_match_player_ratings,
|
|
579
|
+
rating_value=team1_def_rating,
|
|
555
580
|
),
|
|
556
581
|
team_rating=PreMatchTeamRating(
|
|
557
|
-
id=team2,
|
|
582
|
+
id=team2,
|
|
583
|
+
players=c2.pre_match_player_ratings,
|
|
584
|
+
rating_value=team2_off_rating,
|
|
558
585
|
),
|
|
559
586
|
)
|
|
560
587
|
|
|
@@ -569,10 +596,14 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
569
596
|
other=getattr(pre_player, "other", None),
|
|
570
597
|
),
|
|
571
598
|
opponent_team_rating=PreMatchTeamRating(
|
|
572
|
-
id=team1,
|
|
599
|
+
id=team1,
|
|
600
|
+
players=c1.pre_match_player_ratings,
|
|
601
|
+
rating_value=team1_off_rating,
|
|
573
602
|
),
|
|
574
603
|
team_rating=PreMatchTeamRating(
|
|
575
|
-
id=team2,
|
|
604
|
+
id=team2,
|
|
605
|
+
players=c2.pre_match_player_ratings,
|
|
606
|
+
rating_value=team2_def_rating,
|
|
576
607
|
),
|
|
577
608
|
)
|
|
578
609
|
|
|
@@ -881,6 +912,12 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
881
912
|
if cn.league and cn.league in df.columns:
|
|
882
913
|
player_stat_cols.append(cn.league)
|
|
883
914
|
|
|
915
|
+
if cn.team_players_playing_time and cn.team_players_playing_time in df.columns:
|
|
916
|
+
player_stat_cols.append(cn.team_players_playing_time)
|
|
917
|
+
|
|
918
|
+
if cn.opponent_players_playing_time and cn.opponent_players_playing_time in df.columns:
|
|
919
|
+
player_stat_cols.append(cn.opponent_players_playing_time)
|
|
920
|
+
|
|
884
921
|
df = df.with_columns(pl.struct(player_stat_cols).alias(PLAYER_STATS))
|
|
885
922
|
|
|
886
923
|
group_cols = [cn.match_id, cn.team_id, cn.start_date]
|
|
@@ -957,10 +994,24 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
957
994
|
else None
|
|
958
995
|
)
|
|
959
996
|
|
|
997
|
+
team_playing_time = None
|
|
998
|
+
opponent_playing_time = None
|
|
999
|
+
if cn.team_players_playing_time:
|
|
1000
|
+
raw_value = team_player.get(cn.team_players_playing_time)
|
|
1001
|
+
if raw_value is not None:
|
|
1002
|
+
team_playing_time = raw_value
|
|
1003
|
+
|
|
1004
|
+
if cn.opponent_players_playing_time:
|
|
1005
|
+
raw_value = team_player.get(cn.opponent_players_playing_time)
|
|
1006
|
+
if raw_value is not None:
|
|
1007
|
+
opponent_playing_time = raw_value
|
|
1008
|
+
|
|
960
1009
|
mp = MatchPerformance(
|
|
961
1010
|
performance_value=perf_val,
|
|
962
1011
|
projected_participation_weight=projected_participation_weight,
|
|
963
1012
|
participation_weight=participation_weight,
|
|
1013
|
+
team_players_playing_time=team_playing_time,
|
|
1014
|
+
opponent_players_playing_time=opponent_playing_time,
|
|
964
1015
|
)
|
|
965
1016
|
|
|
966
1017
|
if player_id in self._player_off_ratings and player_id in self._player_def_ratings:
|
|
@@ -1194,10 +1245,23 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1194
1245
|
ppw = pw
|
|
1195
1246
|
proj_w.append(float(ppw))
|
|
1196
1247
|
|
|
1248
|
+
team_playing_time = None
|
|
1249
|
+
opponent_playing_time = None
|
|
1250
|
+
if cn.team_players_playing_time:
|
|
1251
|
+
raw_value = tp.get(cn.team_players_playing_time)
|
|
1252
|
+
if raw_value is not None:
|
|
1253
|
+
team_playing_time = raw_value
|
|
1254
|
+
if cn.opponent_players_playing_time:
|
|
1255
|
+
raw_value = tp.get(cn.opponent_players_playing_time)
|
|
1256
|
+
if raw_value is not None:
|
|
1257
|
+
opponent_playing_time = raw_value
|
|
1258
|
+
|
|
1197
1259
|
mp = MatchPerformance(
|
|
1198
1260
|
performance_value=get_perf_value(tp),
|
|
1199
1261
|
projected_participation_weight=ppw,
|
|
1200
1262
|
participation_weight=pw,
|
|
1263
|
+
team_players_playing_time=team_playing_time,
|
|
1264
|
+
opponent_players_playing_time=opponent_playing_time,
|
|
1201
1265
|
)
|
|
1202
1266
|
|
|
1203
1267
|
ensure_new_player(pid, day_number, mp, league, position, pre_list) # noqa: B023
|
|
@@ -1250,10 +1314,10 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1250
1314
|
pred_off = self._performance_predictor.predict_performance(
|
|
1251
1315
|
player_rating=pre,
|
|
1252
1316
|
opponent_team_rating=PreMatchTeamRating(
|
|
1253
|
-
id=team2, players=
|
|
1317
|
+
id=team2, players=t2_pre, rating_value=t2_def_rating
|
|
1254
1318
|
),
|
|
1255
1319
|
team_rating=PreMatchTeamRating(
|
|
1256
|
-
id=team1, players=
|
|
1320
|
+
id=team1, players=t1_pre, rating_value=t1_off_rating
|
|
1257
1321
|
),
|
|
1258
1322
|
)
|
|
1259
1323
|
|
|
@@ -1267,10 +1331,10 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1267
1331
|
position=pre.position,
|
|
1268
1332
|
),
|
|
1269
1333
|
opponent_team_rating=PreMatchTeamRating(
|
|
1270
|
-
id=team2, players=
|
|
1334
|
+
id=team2, players=t2_pre, rating_value=t2_off_rating
|
|
1271
1335
|
),
|
|
1272
1336
|
team_rating=PreMatchTeamRating(
|
|
1273
|
-
id=team1, players=
|
|
1337
|
+
id=team1, players=t1_pre, rating_value=t1_def_rating
|
|
1274
1338
|
),
|
|
1275
1339
|
)
|
|
1276
1340
|
|
|
@@ -1295,10 +1359,10 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1295
1359
|
pred_off = self._performance_predictor.predict_performance(
|
|
1296
1360
|
player_rating=pre,
|
|
1297
1361
|
opponent_team_rating=PreMatchTeamRating(
|
|
1298
|
-
id=team1, players=
|
|
1362
|
+
id=team1, players=t1_pre, rating_value=t1_def_rating
|
|
1299
1363
|
),
|
|
1300
1364
|
team_rating=PreMatchTeamRating(
|
|
1301
|
-
id=team2, players=
|
|
1365
|
+
id=team2, players=t2_pre, rating_value=t2_off_rating
|
|
1302
1366
|
),
|
|
1303
1367
|
)
|
|
1304
1368
|
|
|
@@ -1312,10 +1376,10 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1312
1376
|
position=pre.position,
|
|
1313
1377
|
),
|
|
1314
1378
|
opponent_team_rating=PreMatchTeamRating(
|
|
1315
|
-
id=team1, players=
|
|
1379
|
+
id=team1, players=t1_pre, rating_value=t1_off_rating
|
|
1316
1380
|
),
|
|
1317
1381
|
team_rating=PreMatchTeamRating(
|
|
1318
|
-
id=team2, players=
|
|
1382
|
+
id=team2, players=t2_pre, rating_value=t2_def_rating
|
|
1319
1383
|
),
|
|
1320
1384
|
)
|
|
1321
1385
|
|
|
@@ -133,7 +133,7 @@ class RatingPlayerDifferencePerformancePredictor(PlayerPerformancePredictor):
|
|
|
133
133
|
team_rating_value = team_rating.rating_value
|
|
134
134
|
|
|
135
135
|
if player_rating.match_performance.opponent_players_playing_time and isinstance(
|
|
136
|
-
player_rating.match_performance.
|
|
136
|
+
player_rating.match_performance.opponent_players_playing_time, dict
|
|
137
137
|
):
|
|
138
138
|
weight_opp_rating = 0
|
|
139
139
|
sum_playing_time = 0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.25
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -34,7 +34,7 @@ spforge/estimator/_group_by_estimator.py,sha256=o-xv_PJJyWBaKv5Eo4EPbOvb9i0CuebZ
|
|
|
34
34
|
spforge/estimator/_ordinal_classifier.py,sha256=j_dfVHeX-6eZgPwwsYbkbP6bPrKH2a5S-N8vfP5hneA,1993
|
|
35
35
|
spforge/estimator/_sklearn_enhancer_estimator.py,sha256=DZ-UlmeazXPd6uEnlbVv79syZ5FPa64voUyKArtjjUs,4664
|
|
36
36
|
spforge/feature_generator/__init__.py,sha256=wfLfUkC_lLOCpy7NgDytK-l3HUAuhikuQXdKCgSGbuA,556
|
|
37
|
-
spforge/feature_generator/_base.py,sha256=
|
|
37
|
+
spforge/feature_generator/_base.py,sha256=eL0P4RRqSFaekko_RxtHKs5UXSCxdR3CG57Yvo7ryBo,16341
|
|
38
38
|
spforge/feature_generator/_lag.py,sha256=Qe34y_iQ90GKlIDfXiYahRobAZB8J-BE1MCrfSPuCSY,6821
|
|
39
39
|
spforge/feature_generator/_net_over_predicted.py,sha256=nngVzgLLxgOj8d9avSJCXaC_jNVOl33pWpQJB9RAKTU,2092
|
|
40
40
|
spforge/feature_generator/_regressor_feature_generator.py,sha256=CM8fPbbX5A_wgT5AT0zbs3YBgsZIVKE74C9vS6V6Q4U,5043
|
|
@@ -51,12 +51,12 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
|
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=zltf4utwzKQxkTA8DAPZ4LWRDlwGxoiKFaiPIo4sdNw,60323
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
58
|
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
59
|
-
spforge/ratings/player_performance_predictor.py,sha256=
|
|
59
|
+
spforge/ratings/player_performance_predictor.py,sha256=UPzOEbougHT6FcmOiuTa3vEM6q8FZq-SjKb0AqD0JS4,8365
|
|
60
60
|
spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
|
|
61
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
62
62
|
spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.25.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -89,12 +89,12 @@ tests/feature_generator/test_regressor_feature_generator.py,sha256=3Wfw1NbD11p2N
|
|
|
89
89
|
tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7ASx8xVKuovDoHwK7L0-lAnzv1YQMs,5667
|
|
90
90
|
tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
|
|
91
91
|
tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
|
|
92
|
-
tests/feature_generator/test_rolling_window.py,sha256=
|
|
92
|
+
tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
|
|
93
93
|
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
|
|
95
95
|
tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
|
|
96
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
97
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=Z66LN1-YdUHrS6dszWZf4HeENRyH8oEtu4Nlsh1MpMI,82442
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
100
100
|
tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
|
|
@@ -107,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
107
107
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
108
108
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
109
109
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
110
|
-
spforge-0.8.
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
110
|
+
spforge-0.8.25.dist-info/METADATA,sha256=JwBRy1-fD-a4UzeS_DeCv9AoXfbvbI7DghMls363RQ4,20048
|
|
111
|
+
spforge-0.8.25.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
112
|
+
spforge-0.8.25.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
113
|
+
spforge-0.8.25.dist-info/RECORD,,
|
|
@@ -684,3 +684,39 @@ def test_rolling_mean_historical_transform_higher_granularity(column_names, use_
|
|
|
684
684
|
}
|
|
685
685
|
)
|
|
686
686
|
pd.testing.assert_frame_equal(transformed_df, expected_df, check_like=True, check_dtype=False)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
@pytest.mark.parametrize("df", [pd.DataFrame, pl.DataFrame])
|
|
690
|
+
def test_rolling_window__feature_also_used_as_column_names_field(df):
|
|
691
|
+
column_names = ColumnNames(
|
|
692
|
+
match_id="game_id",
|
|
693
|
+
player_id="player_id",
|
|
694
|
+
team_id="team_id",
|
|
695
|
+
start_date="game_date",
|
|
696
|
+
participation_weight="three_pointers_attempted",
|
|
697
|
+
)
|
|
698
|
+
data = df(
|
|
699
|
+
{
|
|
700
|
+
"game_id": [1, 1, 2, 2],
|
|
701
|
+
"player_id": ["a", "b", "a", "b"],
|
|
702
|
+
"team_id": [1, 2, 1, 2],
|
|
703
|
+
"game_date": [
|
|
704
|
+
pd.to_datetime("2023-01-01"),
|
|
705
|
+
pd.to_datetime("2023-01-01"),
|
|
706
|
+
pd.to_datetime("2023-01-02"),
|
|
707
|
+
pd.to_datetime("2023-01-02"),
|
|
708
|
+
],
|
|
709
|
+
"three_pointers_attempted": [5.0, 3.0, 7.0, 4.0],
|
|
710
|
+
}
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
transformer = RollingWindowTransformer(
|
|
714
|
+
features=["three_pointers_attempted"],
|
|
715
|
+
window=20,
|
|
716
|
+
granularity=["player_id"],
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
transformed_df = transformer.fit_transform(data, column_names=column_names)
|
|
720
|
+
|
|
721
|
+
assert transformer.features_out[0] in transformed_df.columns
|
|
722
|
+
assert len(transformed_df) == len(data)
|
|
@@ -2039,3 +2039,283 @@ def test_fit_transform_when_all_players_have_null_performance_then_no_rating_cha
|
|
|
2039
2039
|
f"Before={p1_off_before_m2}, After={p1_off_after_m2}. "
|
|
2040
2040
|
"Null performance should result in no rating change."
|
|
2041
2041
|
)
|
|
2042
|
+
|
|
2043
|
+
|
|
2044
|
+
# --- team_players_playing_time Tests ---
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
def test_fit_transform_team_players_playing_time_column_not_found_raises_error(base_cn):
|
|
2048
|
+
"""Specifying a nonexistent team_players_playing_time column should raise ValueError."""
|
|
2049
|
+
from dataclasses import replace
|
|
2050
|
+
|
|
2051
|
+
cn = replace(base_cn, team_players_playing_time="nonexistent_column")
|
|
2052
|
+
|
|
2053
|
+
df = pl.DataFrame(
|
|
2054
|
+
{
|
|
2055
|
+
"pid": ["P1", "P2"],
|
|
2056
|
+
"tid": ["T1", "T2"],
|
|
2057
|
+
"mid": ["M1", "M1"],
|
|
2058
|
+
"dt": ["2024-01-01", "2024-01-01"],
|
|
2059
|
+
"perf": [0.6, 0.4],
|
|
2060
|
+
"pw": [1.0, 1.0],
|
|
2061
|
+
}
|
|
2062
|
+
)
|
|
2063
|
+
|
|
2064
|
+
gen = PlayerRatingGenerator(
|
|
2065
|
+
performance_column="perf",
|
|
2066
|
+
column_names=cn,
|
|
2067
|
+
)
|
|
2068
|
+
|
|
2069
|
+
with pytest.raises(ValueError, match="team_players_playing_time column"):
|
|
2070
|
+
gen.fit_transform(df)
|
|
2071
|
+
|
|
2072
|
+
|
|
2073
|
+
def test_fit_transform_opponent_players_playing_time_column_not_found_raises_error(base_cn):
|
|
2074
|
+
"""Specifying a nonexistent opponent_players_playing_time column should raise ValueError."""
|
|
2075
|
+
from dataclasses import replace
|
|
2076
|
+
|
|
2077
|
+
cn = replace(base_cn, opponent_players_playing_time="nonexistent_column")
|
|
2078
|
+
|
|
2079
|
+
df = pl.DataFrame(
|
|
2080
|
+
{
|
|
2081
|
+
"pid": ["P1", "P2"],
|
|
2082
|
+
"tid": ["T1", "T2"],
|
|
2083
|
+
"mid": ["M1", "M1"],
|
|
2084
|
+
"dt": ["2024-01-01", "2024-01-01"],
|
|
2085
|
+
"perf": [0.6, 0.4],
|
|
2086
|
+
"pw": [1.0, 1.0],
|
|
2087
|
+
}
|
|
2088
|
+
)
|
|
2089
|
+
|
|
2090
|
+
gen = PlayerRatingGenerator(
|
|
2091
|
+
performance_column="perf",
|
|
2092
|
+
column_names=cn,
|
|
2093
|
+
)
|
|
2094
|
+
|
|
2095
|
+
with pytest.raises(ValueError, match="opponent_players_playing_time column"):
|
|
2096
|
+
gen.fit_transform(df)
|
|
2097
|
+
|
|
2098
|
+
|
|
2099
|
+
def test_fit_transform_null_playing_time_uses_standard_team_rating(base_cn):
|
|
2100
|
+
"""When team_players_playing_time is null for a row, should use standard team rating."""
|
|
2101
|
+
from dataclasses import replace
|
|
2102
|
+
|
|
2103
|
+
cn = replace(
|
|
2104
|
+
base_cn,
|
|
2105
|
+
team_players_playing_time="team_pt",
|
|
2106
|
+
opponent_players_playing_time="opp_pt",
|
|
2107
|
+
)
|
|
2108
|
+
|
|
2109
|
+
# First establish ratings with a normal match (no playing time data)
|
|
2110
|
+
df1 = pl.DataFrame(
|
|
2111
|
+
{
|
|
2112
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2113
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2114
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
2115
|
+
"dt": ["2024-01-01"] * 4,
|
|
2116
|
+
"perf": [0.8, 0.6, 0.4, 0.2],
|
|
2117
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2118
|
+
"team_pt": [None, None, None, None],
|
|
2119
|
+
"opp_pt": [None, None, None, None],
|
|
2120
|
+
}
|
|
2121
|
+
)
|
|
2122
|
+
|
|
2123
|
+
gen = PlayerRatingGenerator(
|
|
2124
|
+
performance_column="perf",
|
|
2125
|
+
column_names=cn,
|
|
2126
|
+
auto_scale_performance=True,
|
|
2127
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
2128
|
+
non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
|
|
2129
|
+
)
|
|
2130
|
+
|
|
2131
|
+
result = gen.fit_transform(df1)
|
|
2132
|
+
|
|
2133
|
+
# Should work without error and produce predictions
|
|
2134
|
+
assert len(result) == 4
|
|
2135
|
+
assert "player_predicted_off_performance_perf" in result.columns
|
|
2136
|
+
|
|
2137
|
+
# All predictions should be valid (between 0 and 1)
|
|
2138
|
+
predictions = result["player_predicted_off_performance_perf"].to_list()
|
|
2139
|
+
for pred in predictions:
|
|
2140
|
+
assert 0.0 <= pred <= 1.0
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
def test_fit_transform_weighted_calculation_with_playing_time(base_cn):
|
|
2144
|
+
"""Test that playing time weighted calculation produces different predictions."""
|
|
2145
|
+
from dataclasses import replace
|
|
2146
|
+
|
|
2147
|
+
cn = replace(
|
|
2148
|
+
base_cn,
|
|
2149
|
+
team_players_playing_time="team_pt",
|
|
2150
|
+
opponent_players_playing_time="opp_pt",
|
|
2151
|
+
)
|
|
2152
|
+
|
|
2153
|
+
# First establish different ratings for players
|
|
2154
|
+
df1 = pl.DataFrame(
|
|
2155
|
+
{
|
|
2156
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2157
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2158
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
2159
|
+
"dt": ["2024-01-01"] * 4,
|
|
2160
|
+
"perf": [0.9, 0.1, 0.5, 0.5], # P1 high rating, P2 low rating
|
|
2161
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2162
|
+
"team_pt": [None, None, None, None],
|
|
2163
|
+
"opp_pt": [None, None, None, None],
|
|
2164
|
+
}
|
|
2165
|
+
)
|
|
2166
|
+
|
|
2167
|
+
gen = PlayerRatingGenerator(
|
|
2168
|
+
performance_column="perf",
|
|
2169
|
+
column_names=cn,
|
|
2170
|
+
auto_scale_performance=True,
|
|
2171
|
+
start_harcoded_start_rating=1000.0,
|
|
2172
|
+
non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
|
|
2173
|
+
)
|
|
2174
|
+
gen.fit_transform(df1)
|
|
2175
|
+
|
|
2176
|
+
# Verify P1 and P2 have different ratings now
|
|
2177
|
+
p1_rating = gen._player_off_ratings["P1"].rating_value
|
|
2178
|
+
p2_rating = gen._player_off_ratings["P2"].rating_value
|
|
2179
|
+
assert p1_rating > p2_rating, "Setup: P1 should have higher rating than P2"
|
|
2180
|
+
|
|
2181
|
+
# Second match with playing time data
|
|
2182
|
+
# P3 faces opponent P1 80% of time (high rating), P4 faces P2 80% of time (low rating)
|
|
2183
|
+
# Use consistent schema for all dict entries (all keys present in all rows)
|
|
2184
|
+
df2 = pl.DataFrame(
|
|
2185
|
+
{
|
|
2186
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2187
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2188
|
+
"mid": ["M2", "M2", "M2", "M2"],
|
|
2189
|
+
"dt": ["2024-01-02"] * 4,
|
|
2190
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2191
|
+
# Team playing time - who they play WITH on same team
|
|
2192
|
+
"team_pt": [
|
|
2193
|
+
{"P1": 0.0, "P2": 1.0, "P3": 0.5, "P4": 0.5}, # P1 on T1, plays with P2
|
|
2194
|
+
{"P1": 1.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 on T1, plays with P1
|
|
2195
|
+
{"P1": 0.5, "P2": 0.5, "P3": 0.0, "P4": 1.0}, # P3 on T2, plays with P4
|
|
2196
|
+
{"P1": 0.5, "P2": 0.5, "P3": 1.0, "P4": 0.0}, # P4 on T2, plays with P3
|
|
2197
|
+
],
|
|
2198
|
+
# Opponent playing time - who they face on opposing team
|
|
2199
|
+
"opp_pt": [
|
|
2200
|
+
{"P1": 0.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P1 faces T2 opponents evenly
|
|
2201
|
+
{"P1": 0.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 faces T2 opponents evenly
|
|
2202
|
+
{"P1": 0.8, "P2": 0.2, "P3": 0.0, "P4": 0.0}, # P3 faces P1 80% of time
|
|
2203
|
+
{"P1": 0.2, "P2": 0.8, "P3": 0.0, "P4": 0.0}, # P4 faces P2 80% of time
|
|
2204
|
+
],
|
|
2205
|
+
}
|
|
2206
|
+
)
|
|
2207
|
+
|
|
2208
|
+
result = gen.future_transform(df2)
|
|
2209
|
+
|
|
2210
|
+
# Verify we get predictions
|
|
2211
|
+
assert len(result) == 4
|
|
2212
|
+
|
|
2213
|
+
# Get predictions for P3 and P4
|
|
2214
|
+
# P3 faces stronger opponents (mainly P1), P4 faces weaker opponents (mainly P2)
|
|
2215
|
+
# So P3 should have lower predicted performance than P4 (all else equal)
|
|
2216
|
+
p3_pred = result.filter(pl.col("pid") == "P3")["player_predicted_off_performance_perf"][0]
|
|
2217
|
+
p4_pred = result.filter(pl.col("pid") == "P4")["player_predicted_off_performance_perf"][0]
|
|
2218
|
+
|
|
2219
|
+
# P3 faces P1 (high rating) 80% of time, P4 faces P2 (low rating) 80% of time
|
|
2220
|
+
# So P4 should have higher predicted performance
|
|
2221
|
+
assert p4_pred > p3_pred, (
|
|
2222
|
+
f"P4 (facing weak opponents) should have higher prediction than P3 (facing strong opponents). "
|
|
2223
|
+
f"P3 pred={p3_pred:.4f}, P4 pred={p4_pred:.4f}"
|
|
2224
|
+
)
|
|
2225
|
+
|
|
2226
|
+
|
|
2227
|
+
def test_future_transform_weighted_calculation_with_playing_time(base_cn):
|
|
2228
|
+
"""Test that future_transform correctly uses playing time weights."""
|
|
2229
|
+
from dataclasses import replace
|
|
2230
|
+
|
|
2231
|
+
cn = replace(
|
|
2232
|
+
base_cn,
|
|
2233
|
+
team_players_playing_time="team_pt",
|
|
2234
|
+
opponent_players_playing_time="opp_pt",
|
|
2235
|
+
)
|
|
2236
|
+
|
|
2237
|
+
# First establish ratings
|
|
2238
|
+
df1 = pl.DataFrame(
|
|
2239
|
+
{
|
|
2240
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2241
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2242
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
2243
|
+
"dt": ["2024-01-01"] * 4,
|
|
2244
|
+
"perf": [0.9, 0.1, 0.5, 0.5],
|
|
2245
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2246
|
+
"team_pt": [None, None, None, None],
|
|
2247
|
+
"opp_pt": [None, None, None, None],
|
|
2248
|
+
}
|
|
2249
|
+
)
|
|
2250
|
+
|
|
2251
|
+
gen = PlayerRatingGenerator(
|
|
2252
|
+
performance_column="perf",
|
|
2253
|
+
column_names=cn,
|
|
2254
|
+
auto_scale_performance=True,
|
|
2255
|
+
start_harcoded_start_rating=1000.0,
|
|
2256
|
+
non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
|
|
2257
|
+
)
|
|
2258
|
+
gen.fit_transform(df1)
|
|
2259
|
+
|
|
2260
|
+
# Future match with playing time weights (consistent schema)
|
|
2261
|
+
future_df = pl.DataFrame(
|
|
2262
|
+
{
|
|
2263
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2264
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2265
|
+
"mid": ["M2", "M2", "M2", "M2"],
|
|
2266
|
+
"dt": ["2024-01-02"] * 4,
|
|
2267
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2268
|
+
"team_pt": [
|
|
2269
|
+
{"P1": 0.0, "P2": 1.0, "P3": 0.5, "P4": 0.5}, # P1 plays with P2
|
|
2270
|
+
{"P1": 1.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 plays with P1
|
|
2271
|
+
{"P1": 0.5, "P2": 0.5, "P3": 0.0, "P4": 1.0}, # P3 plays with P4
|
|
2272
|
+
{"P1": 0.5, "P2": 0.5, "P3": 1.0, "P4": 0.0}, # P4 plays with P3
|
|
2273
|
+
],
|
|
2274
|
+
"opp_pt": [
|
|
2275
|
+
{"P1": 0.0, "P2": 0.0, "P3": 1.0, "P4": 0.0}, # P1 faces only P3
|
|
2276
|
+
{"P1": 0.0, "P2": 0.0, "P3": 0.0, "P4": 1.0}, # P2 faces only P4
|
|
2277
|
+
{"P1": 1.0, "P2": 0.0, "P3": 0.0, "P4": 0.0}, # P3 faces only P1
|
|
2278
|
+
{"P1": 0.0, "P2": 1.0, "P3": 0.0, "P4": 0.0}, # P4 faces only P2
|
|
2279
|
+
],
|
|
2280
|
+
}
|
|
2281
|
+
)
|
|
2282
|
+
|
|
2283
|
+
result = gen.future_transform(future_df)
|
|
2284
|
+
|
|
2285
|
+
# Verify predictions are valid
|
|
2286
|
+
assert len(result) == 4
|
|
2287
|
+
predictions = result["player_predicted_off_performance_perf"].to_list()
|
|
2288
|
+
for pred in predictions:
|
|
2289
|
+
assert 0.0 <= pred <= 1.0
|
|
2290
|
+
|
|
2291
|
+
|
|
2292
|
+
def test_fit_transform_backward_compatible_without_playing_time_columns(base_cn):
|
|
2293
|
+
"""Behavior should be unchanged when team_players_playing_time columns are not specified."""
|
|
2294
|
+
df = pl.DataFrame(
|
|
2295
|
+
{
|
|
2296
|
+
"pid": ["P1", "P2", "P3", "P4"],
|
|
2297
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
2298
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
2299
|
+
"dt": ["2024-01-01"] * 4,
|
|
2300
|
+
"perf": [0.6, 0.4, 0.7, 0.3],
|
|
2301
|
+
"pw": [1.0, 1.0, 1.0, 1.0],
|
|
2302
|
+
}
|
|
2303
|
+
)
|
|
2304
|
+
|
|
2305
|
+
# Without specifying playing time columns (backward compatible)
|
|
2306
|
+
gen = PlayerRatingGenerator(
|
|
2307
|
+
performance_column="perf",
|
|
2308
|
+
column_names=base_cn, # No playing time columns specified
|
|
2309
|
+
auto_scale_performance=True,
|
|
2310
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
2311
|
+
)
|
|
2312
|
+
|
|
2313
|
+
result = gen.fit_transform(df)
|
|
2314
|
+
|
|
2315
|
+
# Should work normally
|
|
2316
|
+
assert len(result) == 4
|
|
2317
|
+
assert "player_off_rating_perf" in result.columns
|
|
2318
|
+
|
|
2319
|
+
# Ratings should be updated normally
|
|
2320
|
+
assert gen._player_off_ratings["P1"].rating_value != 1000.0
|
|
2321
|
+
assert gen._player_off_ratings["P3"].rating_value > gen._player_off_ratings["P4"].rating_value
|
|
File without changes
|
|
File without changes
|
|
File without changes
|