spforge 0.8.23__py3-none-any.whl → 0.8.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

@@ -176,6 +176,8 @@ class LagGenerator(FeatureGenerator):
176
176
  if additional_cols:
177
177
  cols.extend(additional_cols)
178
178
 
179
+ cols = list(dict.fromkeys(cols))
180
+
179
181
  if self._df is None:
180
182
  self._df = df.select(cols)
181
183
  else:
@@ -330,7 +330,21 @@ class PlayerRatingGenerator(RatingGenerator):
330
330
  df = df.drop(cols_to_drop)
331
331
  return df
332
332
 
333
+ def _validate_playing_time_columns(self, df: pl.DataFrame) -> None:
334
+ cn = self.column_names
335
+ if cn.team_players_playing_time and cn.team_players_playing_time not in df.columns:
336
+ raise ValueError(
337
+ f"team_players_playing_time column '{cn.team_players_playing_time}' "
338
+ f"not found in DataFrame. Available columns: {list(df.columns)}"
339
+ )
340
+ if cn.opponent_players_playing_time and cn.opponent_players_playing_time not in df.columns:
341
+ raise ValueError(
342
+ f"opponent_players_playing_time column '{cn.opponent_players_playing_time}' "
343
+ f"not found in DataFrame. Available columns: {list(df.columns)}"
344
+ )
345
+
333
346
  def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
347
+ self._validate_playing_time_columns(df)
334
348
  df = self._scale_participation_weight_columns(df)
335
349
  match_df = self._create_match_df(df)
336
350
  ratings = self._calculate_ratings(match_df)
@@ -359,6 +373,7 @@ class PlayerRatingGenerator(RatingGenerator):
359
373
  return self._remove_internal_scaled_columns(result)
360
374
 
361
375
  def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
376
+ self._validate_playing_time_columns(df)
362
377
  df = self._scale_participation_weight_columns(df)
363
378
  match_df = self._create_match_df(df)
364
379
  ratings = self._calculate_future_ratings(match_df)
@@ -466,10 +481,14 @@ class PlayerRatingGenerator(RatingGenerator):
466
481
  pred_off = self._performance_predictor.predict_performance(
467
482
  player_rating=pre_player,
468
483
  opponent_team_rating=PreMatchTeamRating(
469
- id=team2, players=[], rating_value=team2_def_rating
484
+ id=team2,
485
+ players=c2.pre_match_player_ratings,
486
+ rating_value=team2_def_rating,
470
487
  ),
471
488
  team_rating=PreMatchTeamRating(
472
- id=team1, players=[], rating_value=team1_off_rating
489
+ id=team1,
490
+ players=c1.pre_match_player_ratings,
491
+ rating_value=team1_off_rating,
473
492
  ),
474
493
  )
475
494
 
@@ -484,10 +503,14 @@ class PlayerRatingGenerator(RatingGenerator):
484
503
  other=getattr(pre_player, "other", None),
485
504
  ),
486
505
  opponent_team_rating=PreMatchTeamRating(
487
- id=team2, players=[], rating_value=team2_off_rating
506
+ id=team2,
507
+ players=c2.pre_match_player_ratings,
508
+ rating_value=team2_off_rating,
488
509
  ),
489
510
  team_rating=PreMatchTeamRating(
490
- id=team1, players=[], rating_value=team1_def_rating
511
+ id=team1,
512
+ players=c1.pre_match_player_ratings,
513
+ rating_value=team1_def_rating,
491
514
  ),
492
515
  )
493
516
 
@@ -551,10 +574,14 @@ class PlayerRatingGenerator(RatingGenerator):
551
574
  pred_off = self._performance_predictor.predict_performance(
552
575
  player_rating=pre_player,
553
576
  opponent_team_rating=PreMatchTeamRating(
554
- id=team1, players=[], rating_value=team1_def_rating
577
+ id=team1,
578
+ players=c1.pre_match_player_ratings,
579
+ rating_value=team1_def_rating,
555
580
  ),
556
581
  team_rating=PreMatchTeamRating(
557
- id=team2, players=[], rating_value=team2_off_rating
582
+ id=team2,
583
+ players=c2.pre_match_player_ratings,
584
+ rating_value=team2_off_rating,
558
585
  ),
559
586
  )
560
587
 
@@ -569,10 +596,14 @@ class PlayerRatingGenerator(RatingGenerator):
569
596
  other=getattr(pre_player, "other", None),
570
597
  ),
571
598
  opponent_team_rating=PreMatchTeamRating(
572
- id=team1, players=[], rating_value=team1_off_rating
599
+ id=team1,
600
+ players=c1.pre_match_player_ratings,
601
+ rating_value=team1_off_rating,
573
602
  ),
574
603
  team_rating=PreMatchTeamRating(
575
- id=team2, players=[], rating_value=team2_def_rating
604
+ id=team2,
605
+ players=c2.pre_match_player_ratings,
606
+ rating_value=team2_def_rating,
576
607
  ),
577
608
  )
578
609
 
@@ -881,6 +912,12 @@ class PlayerRatingGenerator(RatingGenerator):
881
912
  if cn.league and cn.league in df.columns:
882
913
  player_stat_cols.append(cn.league)
883
914
 
915
+ if cn.team_players_playing_time and cn.team_players_playing_time in df.columns:
916
+ player_stat_cols.append(cn.team_players_playing_time)
917
+
918
+ if cn.opponent_players_playing_time and cn.opponent_players_playing_time in df.columns:
919
+ player_stat_cols.append(cn.opponent_players_playing_time)
920
+
884
921
  df = df.with_columns(pl.struct(player_stat_cols).alias(PLAYER_STATS))
885
922
 
886
923
  group_cols = [cn.match_id, cn.team_id, cn.start_date]
@@ -957,10 +994,24 @@ class PlayerRatingGenerator(RatingGenerator):
957
994
  else None
958
995
  )
959
996
 
997
+ team_playing_time = None
998
+ opponent_playing_time = None
999
+ if cn.team_players_playing_time:
1000
+ raw_value = team_player.get(cn.team_players_playing_time)
1001
+ if raw_value is not None:
1002
+ team_playing_time = raw_value
1003
+
1004
+ if cn.opponent_players_playing_time:
1005
+ raw_value = team_player.get(cn.opponent_players_playing_time)
1006
+ if raw_value is not None:
1007
+ opponent_playing_time = raw_value
1008
+
960
1009
  mp = MatchPerformance(
961
1010
  performance_value=perf_val,
962
1011
  projected_participation_weight=projected_participation_weight,
963
1012
  participation_weight=participation_weight,
1013
+ team_players_playing_time=team_playing_time,
1014
+ opponent_players_playing_time=opponent_playing_time,
964
1015
  )
965
1016
 
966
1017
  if player_id in self._player_off_ratings and player_id in self._player_def_ratings:
@@ -1194,10 +1245,23 @@ class PlayerRatingGenerator(RatingGenerator):
1194
1245
  ppw = pw
1195
1246
  proj_w.append(float(ppw))
1196
1247
 
1248
+ team_playing_time = None
1249
+ opponent_playing_time = None
1250
+ if cn.team_players_playing_time:
1251
+ raw_value = tp.get(cn.team_players_playing_time)
1252
+ if raw_value is not None:
1253
+ team_playing_time = raw_value
1254
+ if cn.opponent_players_playing_time:
1255
+ raw_value = tp.get(cn.opponent_players_playing_time)
1256
+ if raw_value is not None:
1257
+ opponent_playing_time = raw_value
1258
+
1197
1259
  mp = MatchPerformance(
1198
1260
  performance_value=get_perf_value(tp),
1199
1261
  projected_participation_weight=ppw,
1200
1262
  participation_weight=pw,
1263
+ team_players_playing_time=team_playing_time,
1264
+ opponent_players_playing_time=opponent_playing_time,
1201
1265
  )
1202
1266
 
1203
1267
  ensure_new_player(pid, day_number, mp, league, position, pre_list) # noqa: B023
@@ -1250,10 +1314,10 @@ class PlayerRatingGenerator(RatingGenerator):
1250
1314
  pred_off = self._performance_predictor.predict_performance(
1251
1315
  player_rating=pre,
1252
1316
  opponent_team_rating=PreMatchTeamRating(
1253
- id=team2, players=[], rating_value=t2_def_rating
1317
+ id=team2, players=t2_pre, rating_value=t2_def_rating
1254
1318
  ),
1255
1319
  team_rating=PreMatchTeamRating(
1256
- id=team1, players=[], rating_value=t1_off_rating
1320
+ id=team1, players=t1_pre, rating_value=t1_off_rating
1257
1321
  ),
1258
1322
  )
1259
1323
 
@@ -1267,10 +1331,10 @@ class PlayerRatingGenerator(RatingGenerator):
1267
1331
  position=pre.position,
1268
1332
  ),
1269
1333
  opponent_team_rating=PreMatchTeamRating(
1270
- id=team2, players=[], rating_value=t2_off_rating
1334
+ id=team2, players=t2_pre, rating_value=t2_off_rating
1271
1335
  ),
1272
1336
  team_rating=PreMatchTeamRating(
1273
- id=team1, players=[], rating_value=t1_def_rating
1337
+ id=team1, players=t1_pre, rating_value=t1_def_rating
1274
1338
  ),
1275
1339
  )
1276
1340
 
@@ -1295,10 +1359,10 @@ class PlayerRatingGenerator(RatingGenerator):
1295
1359
  pred_off = self._performance_predictor.predict_performance(
1296
1360
  player_rating=pre,
1297
1361
  opponent_team_rating=PreMatchTeamRating(
1298
- id=team1, players=[], rating_value=t1_def_rating
1362
+ id=team1, players=t1_pre, rating_value=t1_def_rating
1299
1363
  ),
1300
1364
  team_rating=PreMatchTeamRating(
1301
- id=team2, players=[], rating_value=t2_off_rating
1365
+ id=team2, players=t2_pre, rating_value=t2_off_rating
1302
1366
  ),
1303
1367
  )
1304
1368
 
@@ -1312,10 +1376,10 @@ class PlayerRatingGenerator(RatingGenerator):
1312
1376
  position=pre.position,
1313
1377
  ),
1314
1378
  opponent_team_rating=PreMatchTeamRating(
1315
- id=team1, players=[], rating_value=t1_off_rating
1379
+ id=team1, players=t1_pre, rating_value=t1_off_rating
1316
1380
  ),
1317
1381
  team_rating=PreMatchTeamRating(
1318
- id=team2, players=[], rating_value=t2_def_rating
1382
+ id=team2, players=t2_pre, rating_value=t2_def_rating
1319
1383
  ),
1320
1384
  )
1321
1385
 
@@ -133,7 +133,7 @@ class RatingPlayerDifferencePerformancePredictor(PlayerPerformancePredictor):
133
133
  team_rating_value = team_rating.rating_value
134
134
 
135
135
  if player_rating.match_performance.opponent_players_playing_time and isinstance(
136
- player_rating.match_performance.team_players_playing_time, dict
136
+ player_rating.match_performance.opponent_players_playing_time, dict
137
137
  ):
138
138
  weight_opp_rating = 0
139
139
  sum_playing_time = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.23
3
+ Version: 0.8.25
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -34,7 +34,7 @@ spforge/estimator/_group_by_estimator.py,sha256=o-xv_PJJyWBaKv5Eo4EPbOvb9i0CuebZ
34
34
  spforge/estimator/_ordinal_classifier.py,sha256=j_dfVHeX-6eZgPwwsYbkbP6bPrKH2a5S-N8vfP5hneA,1993
35
35
  spforge/estimator/_sklearn_enhancer_estimator.py,sha256=DZ-UlmeazXPd6uEnlbVv79syZ5FPa64voUyKArtjjUs,4664
36
36
  spforge/feature_generator/__init__.py,sha256=wfLfUkC_lLOCpy7NgDytK-l3HUAuhikuQXdKCgSGbuA,556
37
- spforge/feature_generator/_base.py,sha256=8_RtsnMvc1JOwAeUmnA-WP_Za3HvlBU4jZkg0yI0M-8,16299
37
+ spforge/feature_generator/_base.py,sha256=eL0P4RRqSFaekko_RxtHKs5UXSCxdR3CG57Yvo7ryBo,16341
38
38
  spforge/feature_generator/_lag.py,sha256=Qe34y_iQ90GKlIDfXiYahRobAZB8J-BE1MCrfSPuCSY,6821
39
39
  spforge/feature_generator/_net_over_predicted.py,sha256=nngVzgLLxgOj8d9avSJCXaC_jNVOl33pWpQJB9RAKTU,2092
40
40
  spforge/feature_generator/_regressor_feature_generator.py,sha256=CM8fPbbX5A_wgT5AT0zbs3YBgsZIVKE74C9vS6V6Q4U,5043
@@ -51,12 +51,12 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
54
- spforge/ratings/_player_rating.py,sha256=zhTI6isbNXYy9xAyMt_6nlOktsk6TukDVWV7vS7G4qg,57190
54
+ spforge/ratings/_player_rating.py,sha256=zltf4utwzKQxkTA8DAPZ4LWRDlwGxoiKFaiPIo4sdNw,60323
55
55
  spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
58
58
  spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
59
- spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
59
+ spforge/ratings/player_performance_predictor.py,sha256=UPzOEbougHT6FcmOiuTa3vEM6q8FZq-SjKb0AqD0JS4,8365
60
60
  spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
61
61
  spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
62
62
  spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.23.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.25.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -89,12 +89,12 @@ tests/feature_generator/test_regressor_feature_generator.py,sha256=3Wfw1NbD11p2N
89
89
  tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7ASx8xVKuovDoHwK7L0-lAnzv1YQMs,5667
90
90
  tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
91
91
  tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
92
- tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
92
+ tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
93
93
  tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
95
95
  tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
96
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
97
- tests/ratings/test_player_rating_generator.py,sha256=51iWgQRBHbb2-IPeajpej9ncGDWI1eUYdWrLXaKd9Ig,72232
97
+ tests/ratings/test_player_rating_generator.py,sha256=Z66LN1-YdUHrS6dszWZf4HeENRyH8oEtu4Nlsh1MpMI,82442
98
98
  tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
100
100
  tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
@@ -107,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
107
107
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
108
108
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
109
109
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
110
- spforge-0.8.23.dist-info/METADATA,sha256=jlkQ3fEjfwmJ_euPrFO6OlI-hT0LMQN928wz87B1qVU,20048
111
- spforge-0.8.23.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
- spforge-0.8.23.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
- spforge-0.8.23.dist-info/RECORD,,
110
+ spforge-0.8.25.dist-info/METADATA,sha256=JwBRy1-fD-a4UzeS_DeCv9AoXfbvbI7DghMls363RQ4,20048
111
+ spforge-0.8.25.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
+ spforge-0.8.25.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
+ spforge-0.8.25.dist-info/RECORD,,
@@ -684,3 +684,39 @@ def test_rolling_mean_historical_transform_higher_granularity(column_names, use_
684
684
  }
685
685
  )
686
686
  pd.testing.assert_frame_equal(transformed_df, expected_df, check_like=True, check_dtype=False)
687
+
688
+
689
+ @pytest.mark.parametrize("df", [pd.DataFrame, pl.DataFrame])
690
+ def test_rolling_window__feature_also_used_as_column_names_field(df):
691
+ column_names = ColumnNames(
692
+ match_id="game_id",
693
+ player_id="player_id",
694
+ team_id="team_id",
695
+ start_date="game_date",
696
+ participation_weight="three_pointers_attempted",
697
+ )
698
+ data = df(
699
+ {
700
+ "game_id": [1, 1, 2, 2],
701
+ "player_id": ["a", "b", "a", "b"],
702
+ "team_id": [1, 2, 1, 2],
703
+ "game_date": [
704
+ pd.to_datetime("2023-01-01"),
705
+ pd.to_datetime("2023-01-01"),
706
+ pd.to_datetime("2023-01-02"),
707
+ pd.to_datetime("2023-01-02"),
708
+ ],
709
+ "three_pointers_attempted": [5.0, 3.0, 7.0, 4.0],
710
+ }
711
+ )
712
+
713
+ transformer = RollingWindowTransformer(
714
+ features=["three_pointers_attempted"],
715
+ window=20,
716
+ granularity=["player_id"],
717
+ )
718
+
719
+ transformed_df = transformer.fit_transform(data, column_names=column_names)
720
+
721
+ assert transformer.features_out[0] in transformed_df.columns
722
+ assert len(transformed_df) == len(data)
@@ -2039,3 +2039,283 @@ def test_fit_transform_when_all_players_have_null_performance_then_no_rating_cha
2039
2039
  f"Before={p1_off_before_m2}, After={p1_off_after_m2}. "
2040
2040
  "Null performance should result in no rating change."
2041
2041
  )
2042
+
2043
+
2044
+ # --- team_players_playing_time Tests ---
2045
+
2046
+
2047
+ def test_fit_transform_team_players_playing_time_column_not_found_raises_error(base_cn):
2048
+ """Specifying a nonexistent team_players_playing_time column should raise ValueError."""
2049
+ from dataclasses import replace
2050
+
2051
+ cn = replace(base_cn, team_players_playing_time="nonexistent_column")
2052
+
2053
+ df = pl.DataFrame(
2054
+ {
2055
+ "pid": ["P1", "P2"],
2056
+ "tid": ["T1", "T2"],
2057
+ "mid": ["M1", "M1"],
2058
+ "dt": ["2024-01-01", "2024-01-01"],
2059
+ "perf": [0.6, 0.4],
2060
+ "pw": [1.0, 1.0],
2061
+ }
2062
+ )
2063
+
2064
+ gen = PlayerRatingGenerator(
2065
+ performance_column="perf",
2066
+ column_names=cn,
2067
+ )
2068
+
2069
+ with pytest.raises(ValueError, match="team_players_playing_time column"):
2070
+ gen.fit_transform(df)
2071
+
2072
+
2073
+ def test_fit_transform_opponent_players_playing_time_column_not_found_raises_error(base_cn):
2074
+ """Specifying a nonexistent opponent_players_playing_time column should raise ValueError."""
2075
+ from dataclasses import replace
2076
+
2077
+ cn = replace(base_cn, opponent_players_playing_time="nonexistent_column")
2078
+
2079
+ df = pl.DataFrame(
2080
+ {
2081
+ "pid": ["P1", "P2"],
2082
+ "tid": ["T1", "T2"],
2083
+ "mid": ["M1", "M1"],
2084
+ "dt": ["2024-01-01", "2024-01-01"],
2085
+ "perf": [0.6, 0.4],
2086
+ "pw": [1.0, 1.0],
2087
+ }
2088
+ )
2089
+
2090
+ gen = PlayerRatingGenerator(
2091
+ performance_column="perf",
2092
+ column_names=cn,
2093
+ )
2094
+
2095
+ with pytest.raises(ValueError, match="opponent_players_playing_time column"):
2096
+ gen.fit_transform(df)
2097
+
2098
+
2099
+ def test_fit_transform_null_playing_time_uses_standard_team_rating(base_cn):
2100
+ """When team_players_playing_time is null for a row, should use standard team rating."""
2101
+ from dataclasses import replace
2102
+
2103
+ cn = replace(
2104
+ base_cn,
2105
+ team_players_playing_time="team_pt",
2106
+ opponent_players_playing_time="opp_pt",
2107
+ )
2108
+
2109
+ # First establish ratings with a normal match (no playing time data)
2110
+ df1 = pl.DataFrame(
2111
+ {
2112
+ "pid": ["P1", "P2", "P3", "P4"],
2113
+ "tid": ["T1", "T1", "T2", "T2"],
2114
+ "mid": ["M1", "M1", "M1", "M1"],
2115
+ "dt": ["2024-01-01"] * 4,
2116
+ "perf": [0.8, 0.6, 0.4, 0.2],
2117
+ "pw": [1.0, 1.0, 1.0, 1.0],
2118
+ "team_pt": [None, None, None, None],
2119
+ "opp_pt": [None, None, None, None],
2120
+ }
2121
+ )
2122
+
2123
+ gen = PlayerRatingGenerator(
2124
+ performance_column="perf",
2125
+ column_names=cn,
2126
+ auto_scale_performance=True,
2127
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
2128
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
2129
+ )
2130
+
2131
+ result = gen.fit_transform(df1)
2132
+
2133
+ # Should work without error and produce predictions
2134
+ assert len(result) == 4
2135
+ assert "player_predicted_off_performance_perf" in result.columns
2136
+
2137
+ # All predictions should be valid (between 0 and 1)
2138
+ predictions = result["player_predicted_off_performance_perf"].to_list()
2139
+ for pred in predictions:
2140
+ assert 0.0 <= pred <= 1.0
2141
+
2142
+
2143
+ def test_fit_transform_weighted_calculation_with_playing_time(base_cn):
2144
+ """Test that playing time weighted calculation produces different predictions."""
2145
+ from dataclasses import replace
2146
+
2147
+ cn = replace(
2148
+ base_cn,
2149
+ team_players_playing_time="team_pt",
2150
+ opponent_players_playing_time="opp_pt",
2151
+ )
2152
+
2153
+ # First establish different ratings for players
2154
+ df1 = pl.DataFrame(
2155
+ {
2156
+ "pid": ["P1", "P2", "P3", "P4"],
2157
+ "tid": ["T1", "T1", "T2", "T2"],
2158
+ "mid": ["M1", "M1", "M1", "M1"],
2159
+ "dt": ["2024-01-01"] * 4,
2160
+ "perf": [0.9, 0.1, 0.5, 0.5], # P1 high rating, P2 low rating
2161
+ "pw": [1.0, 1.0, 1.0, 1.0],
2162
+ "team_pt": [None, None, None, None],
2163
+ "opp_pt": [None, None, None, None],
2164
+ }
2165
+ )
2166
+
2167
+ gen = PlayerRatingGenerator(
2168
+ performance_column="perf",
2169
+ column_names=cn,
2170
+ auto_scale_performance=True,
2171
+ start_harcoded_start_rating=1000.0,
2172
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
2173
+ )
2174
+ gen.fit_transform(df1)
2175
+
2176
+ # Verify P1 and P2 have different ratings now
2177
+ p1_rating = gen._player_off_ratings["P1"].rating_value
2178
+ p2_rating = gen._player_off_ratings["P2"].rating_value
2179
+ assert p1_rating > p2_rating, "Setup: P1 should have higher rating than P2"
2180
+
2181
+ # Second match with playing time data
2182
+ # P3 faces opponent P1 80% of time (high rating), P4 faces P2 80% of time (low rating)
2183
+ # Use consistent schema for all dict entries (all keys present in all rows)
2184
+ df2 = pl.DataFrame(
2185
+ {
2186
+ "pid": ["P1", "P2", "P3", "P4"],
2187
+ "tid": ["T1", "T1", "T2", "T2"],
2188
+ "mid": ["M2", "M2", "M2", "M2"],
2189
+ "dt": ["2024-01-02"] * 4,
2190
+ "pw": [1.0, 1.0, 1.0, 1.0],
2191
+ # Team playing time - who they play WITH on same team
2192
+ "team_pt": [
2193
+ {"P1": 0.0, "P2": 1.0, "P3": 0.5, "P4": 0.5}, # P1 on T1, plays with P2
2194
+ {"P1": 1.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 on T1, plays with P1
2195
+ {"P1": 0.5, "P2": 0.5, "P3": 0.0, "P4": 1.0}, # P3 on T2, plays with P4
2196
+ {"P1": 0.5, "P2": 0.5, "P3": 1.0, "P4": 0.0}, # P4 on T2, plays with P3
2197
+ ],
2198
+ # Opponent playing time - who they face on opposing team
2199
+ "opp_pt": [
2200
+ {"P1": 0.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P1 faces T2 opponents evenly
2201
+ {"P1": 0.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 faces T2 opponents evenly
2202
+ {"P1": 0.8, "P2": 0.2, "P3": 0.0, "P4": 0.0}, # P3 faces P1 80% of time
2203
+ {"P1": 0.2, "P2": 0.8, "P3": 0.0, "P4": 0.0}, # P4 faces P2 80% of time
2204
+ ],
2205
+ }
2206
+ )
2207
+
2208
+ result = gen.future_transform(df2)
2209
+
2210
+ # Verify we get predictions
2211
+ assert len(result) == 4
2212
+
2213
+ # Get predictions for P3 and P4
2214
+ # P3 faces stronger opponents (mainly P1), P4 faces weaker opponents (mainly P2)
2215
+ # So P3 should have lower predicted performance than P4 (all else equal)
2216
+ p3_pred = result.filter(pl.col("pid") == "P3")["player_predicted_off_performance_perf"][0]
2217
+ p4_pred = result.filter(pl.col("pid") == "P4")["player_predicted_off_performance_perf"][0]
2218
+
2219
+ # P3 faces P1 (high rating) 80% of time, P4 faces P2 (low rating) 80% of time
2220
+ # So P4 should have higher predicted performance
2221
+ assert p4_pred > p3_pred, (
2222
+ f"P4 (facing weak opponents) should have higher prediction than P3 (facing strong opponents). "
2223
+ f"P3 pred={p3_pred:.4f}, P4 pred={p4_pred:.4f}"
2224
+ )
2225
+
2226
+
2227
+ def test_future_transform_weighted_calculation_with_playing_time(base_cn):
2228
+ """Test that future_transform correctly uses playing time weights."""
2229
+ from dataclasses import replace
2230
+
2231
+ cn = replace(
2232
+ base_cn,
2233
+ team_players_playing_time="team_pt",
2234
+ opponent_players_playing_time="opp_pt",
2235
+ )
2236
+
2237
+ # First establish ratings
2238
+ df1 = pl.DataFrame(
2239
+ {
2240
+ "pid": ["P1", "P2", "P3", "P4"],
2241
+ "tid": ["T1", "T1", "T2", "T2"],
2242
+ "mid": ["M1", "M1", "M1", "M1"],
2243
+ "dt": ["2024-01-01"] * 4,
2244
+ "perf": [0.9, 0.1, 0.5, 0.5],
2245
+ "pw": [1.0, 1.0, 1.0, 1.0],
2246
+ "team_pt": [None, None, None, None],
2247
+ "opp_pt": [None, None, None, None],
2248
+ }
2249
+ )
2250
+
2251
+ gen = PlayerRatingGenerator(
2252
+ performance_column="perf",
2253
+ column_names=cn,
2254
+ auto_scale_performance=True,
2255
+ start_harcoded_start_rating=1000.0,
2256
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_OFF_PERFORMANCE],
2257
+ )
2258
+ gen.fit_transform(df1)
2259
+
2260
+ # Future match with playing time weights (consistent schema)
2261
+ future_df = pl.DataFrame(
2262
+ {
2263
+ "pid": ["P1", "P2", "P3", "P4"],
2264
+ "tid": ["T1", "T1", "T2", "T2"],
2265
+ "mid": ["M2", "M2", "M2", "M2"],
2266
+ "dt": ["2024-01-02"] * 4,
2267
+ "pw": [1.0, 1.0, 1.0, 1.0],
2268
+ "team_pt": [
2269
+ {"P1": 0.0, "P2": 1.0, "P3": 0.5, "P4": 0.5}, # P1 plays with P2
2270
+ {"P1": 1.0, "P2": 0.0, "P3": 0.5, "P4": 0.5}, # P2 plays with P1
2271
+ {"P1": 0.5, "P2": 0.5, "P3": 0.0, "P4": 1.0}, # P3 plays with P4
2272
+ {"P1": 0.5, "P2": 0.5, "P3": 1.0, "P4": 0.0}, # P4 plays with P3
2273
+ ],
2274
+ "opp_pt": [
2275
+ {"P1": 0.0, "P2": 0.0, "P3": 1.0, "P4": 0.0}, # P1 faces only P3
2276
+ {"P1": 0.0, "P2": 0.0, "P3": 0.0, "P4": 1.0}, # P2 faces only P4
2277
+ {"P1": 1.0, "P2": 0.0, "P3": 0.0, "P4": 0.0}, # P3 faces only P1
2278
+ {"P1": 0.0, "P2": 1.0, "P3": 0.0, "P4": 0.0}, # P4 faces only P2
2279
+ ],
2280
+ }
2281
+ )
2282
+
2283
+ result = gen.future_transform(future_df)
2284
+
2285
+ # Verify predictions are valid
2286
+ assert len(result) == 4
2287
+ predictions = result["player_predicted_off_performance_perf"].to_list()
2288
+ for pred in predictions:
2289
+ assert 0.0 <= pred <= 1.0
2290
+
2291
+
2292
+ def test_fit_transform_backward_compatible_without_playing_time_columns(base_cn):
2293
+ """Behavior should be unchanged when team_players_playing_time columns are not specified."""
2294
+ df = pl.DataFrame(
2295
+ {
2296
+ "pid": ["P1", "P2", "P3", "P4"],
2297
+ "tid": ["T1", "T1", "T2", "T2"],
2298
+ "mid": ["M1", "M1", "M1", "M1"],
2299
+ "dt": ["2024-01-01"] * 4,
2300
+ "perf": [0.6, 0.4, 0.7, 0.3],
2301
+ "pw": [1.0, 1.0, 1.0, 1.0],
2302
+ }
2303
+ )
2304
+
2305
+ # Without specifying playing time columns (backward compatible)
2306
+ gen = PlayerRatingGenerator(
2307
+ performance_column="perf",
2308
+ column_names=base_cn, # No playing time columns specified
2309
+ auto_scale_performance=True,
2310
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
2311
+ )
2312
+
2313
+ result = gen.fit_transform(df)
2314
+
2315
+ # Should work normally
2316
+ assert len(result) == 4
2317
+ assert "player_off_rating_perf" in result.columns
2318
+
2319
+ # Ratings should be updated normally
2320
+ assert gen._player_off_ratings["P1"].rating_value != 1000.0
2321
+ assert gen._player_off_ratings["P3"].rating_value > gen._player_off_ratings["P4"].rating_value