spforge 0.8.15__py3-none-any.whl → 0.8.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/ratings/_player_rating.py +30 -8
- spforge/ratings/start_rating_generator.py +1 -1
- spforge/ratings/team_start_rating_generator.py +1 -1
- spforge/scorer/_score.py +42 -11
- {spforge-0.8.15.dist-info → spforge-0.8.17.dist-info}/METADATA +1 -1
- {spforge-0.8.15.dist-info → spforge-0.8.17.dist-info}/RECORD +11 -11
- tests/ratings/test_player_rating_generator.py +70 -0
- tests/scorer/test_score.py +142 -0
- {spforge-0.8.15.dist-info → spforge-0.8.17.dist-info}/WHEEL +0 -0
- {spforge-0.8.15.dist-info → spforge-0.8.17.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.15.dist-info → spforge-0.8.17.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@ from spforge.data_structures import (
|
|
|
16
16
|
MatchPerformance,
|
|
17
17
|
MatchPlayer,
|
|
18
18
|
PlayerRating,
|
|
19
|
+
PlayerRatingChange,
|
|
19
20
|
PlayerRatingsResult,
|
|
20
21
|
PreMatchPlayerRating,
|
|
21
22
|
PreMatchPlayersCollection,
|
|
@@ -78,7 +79,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
78
79
|
start_min_count_for_percentiles: int = 50,
|
|
79
80
|
start_team_rating_subtract: float = 80,
|
|
80
81
|
start_team_weight: float = 0,
|
|
81
|
-
start_max_days_ago_league_entities: int =
|
|
82
|
+
start_max_days_ago_league_entities: int = 600,
|
|
82
83
|
start_min_match_count_team_rating: int = 2,
|
|
83
84
|
start_harcoded_start_rating: float | None = None,
|
|
84
85
|
column_names: ColumnNames | None = None,
|
|
@@ -442,9 +443,9 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
442
443
|
team1_off_rating, team1_def_rating = self._team_off_def_rating_from_collection(c1)
|
|
443
444
|
team2_off_rating, team2_def_rating = self._team_off_def_rating_from_collection(c2)
|
|
444
445
|
|
|
445
|
-
player_updates: list[
|
|
446
|
-
[]
|
|
447
|
-
|
|
446
|
+
player_updates: list[
|
|
447
|
+
tuple[str, str, float, float, float, float, float, float, int, str | None]
|
|
448
|
+
] = []
|
|
448
449
|
|
|
449
450
|
for pre_player in c1.pre_match_player_ratings:
|
|
450
451
|
pid = pre_player.id
|
|
@@ -520,6 +521,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
520
521
|
float(off_change),
|
|
521
522
|
float(def_change),
|
|
522
523
|
day_number,
|
|
524
|
+
pre_player.league,
|
|
523
525
|
)
|
|
524
526
|
)
|
|
525
527
|
|
|
@@ -597,6 +599,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
597
599
|
float(off_change),
|
|
598
600
|
float(def_change),
|
|
599
601
|
day_number,
|
|
602
|
+
pre_player.league,
|
|
600
603
|
)
|
|
601
604
|
)
|
|
602
605
|
|
|
@@ -611,6 +614,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
611
614
|
_off_change,
|
|
612
615
|
_def_change,
|
|
613
616
|
_dn,
|
|
617
|
+
_league,
|
|
614
618
|
) in player_updates:
|
|
615
619
|
out[cn.player_id].append(pid)
|
|
616
620
|
out[cn.match_id].append(match_id)
|
|
@@ -627,15 +631,18 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
627
631
|
for (
|
|
628
632
|
pid,
|
|
629
633
|
team_id,
|
|
630
|
-
|
|
634
|
+
off_pre,
|
|
631
635
|
_def_pre,
|
|
632
636
|
_pred_off,
|
|
633
637
|
_pred_def,
|
|
634
638
|
off_change,
|
|
635
639
|
def_change,
|
|
636
640
|
dn,
|
|
641
|
+
league,
|
|
637
642
|
) in player_updates:
|
|
638
|
-
pending_team_updates.append(
|
|
643
|
+
pending_team_updates.append(
|
|
644
|
+
(pid, team_id, off_pre, off_change, def_change, dn, league)
|
|
645
|
+
)
|
|
639
646
|
|
|
640
647
|
if last_update_id is None:
|
|
641
648
|
last_update_id = update_id
|
|
@@ -645,9 +652,11 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
645
652
|
|
|
646
653
|
return pl.DataFrame(out, strict=False)
|
|
647
654
|
|
|
648
|
-
def _apply_player_updates(
|
|
655
|
+
def _apply_player_updates(
|
|
656
|
+
self, updates: list[tuple[str, str, float, float, float, int, str | None]]
|
|
657
|
+
) -> None:
|
|
649
658
|
|
|
650
|
-
for player_id, team_id, off_change, def_change, day_number in updates:
|
|
659
|
+
for player_id, team_id, pre_rating, off_change, def_change, day_number, league in updates:
|
|
651
660
|
off_state = self._player_off_ratings[player_id]
|
|
652
661
|
off_state.confidence_sum = self._calculate_post_match_confidence_sum(
|
|
653
662
|
entity_rating=off_state,
|
|
@@ -670,6 +679,19 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
670
679
|
def_state.last_match_day_number = int(day_number)
|
|
671
680
|
def_state.most_recent_team_id = team_id
|
|
672
681
|
|
|
682
|
+
self.start_rating_generator.update_players_to_leagues(
|
|
683
|
+
PlayerRatingChange(
|
|
684
|
+
id=player_id,
|
|
685
|
+
day_number=day_number,
|
|
686
|
+
league=league,
|
|
687
|
+
participation_weight=1.0,
|
|
688
|
+
predicted_performance=0.0,
|
|
689
|
+
performance=0.0,
|
|
690
|
+
pre_match_rating_value=pre_rating,
|
|
691
|
+
rating_change_value=off_change,
|
|
692
|
+
)
|
|
693
|
+
)
|
|
694
|
+
|
|
673
695
|
def _add_rating_features(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
674
696
|
cols_to_add = set((self._features_out or []) + (self.non_predictor_features_out or []))
|
|
675
697
|
|
|
@@ -28,7 +28,7 @@ class StartRatingGenerator:
|
|
|
28
28
|
min_count_for_percentiles: int = 50,
|
|
29
29
|
team_rating_subtract: float = 80,
|
|
30
30
|
team_weight: float = 0,
|
|
31
|
-
max_days_ago_league_entities: int =
|
|
31
|
+
max_days_ago_league_entities: int = 600,
|
|
32
32
|
min_match_count_team_rating: int = 2,
|
|
33
33
|
harcoded_start_rating: float | None = None,
|
|
34
34
|
):
|
|
@@ -24,7 +24,7 @@ class TeamStartRatingGenerator:
|
|
|
24
24
|
league_ratings: dict[str, float] | None = None,
|
|
25
25
|
league_quantile: float = 0.2,
|
|
26
26
|
min_count_for_percentiles: int = 50,
|
|
27
|
-
max_days_ago_league_entities: int =
|
|
27
|
+
max_days_ago_league_entities: int = 600,
|
|
28
28
|
min_match_count_team_rating: int = 2,
|
|
29
29
|
harcoded_start_rating: float | None = None,
|
|
30
30
|
):
|
spforge/scorer/_score.py
CHANGED
|
@@ -366,18 +366,49 @@ class PWMSE(BaseScorer):
|
|
|
366
366
|
self.labels = labels
|
|
367
367
|
self.evaluation_labels = evaluation_labels
|
|
368
368
|
|
|
369
|
+
self._needs_extension = False
|
|
370
|
+
self._needs_slicing = False
|
|
369
371
|
self._eval_indices: list[int] | None = None
|
|
372
|
+
self._extension_mapping: dict[int, int] | None = None
|
|
373
|
+
|
|
370
374
|
if self.evaluation_labels is not None and self.labels is not None:
|
|
371
|
-
|
|
372
|
-
|
|
375
|
+
training_set = set(self.labels)
|
|
376
|
+
eval_set = set(self.evaluation_labels)
|
|
377
|
+
|
|
378
|
+
if eval_set <= training_set:
|
|
379
|
+
self._needs_slicing = True
|
|
380
|
+
label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
|
|
381
|
+
self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
|
|
382
|
+
elif training_set <= eval_set:
|
|
383
|
+
self._needs_extension = True
|
|
384
|
+
eval_label_to_idx = {lbl: i for i, lbl in enumerate(self.evaluation_labels)}
|
|
385
|
+
self._extension_mapping = {
|
|
386
|
+
train_idx: eval_label_to_idx[lbl]
|
|
387
|
+
for train_idx, lbl in enumerate(self.labels)
|
|
388
|
+
}
|
|
389
|
+
else:
|
|
390
|
+
raise ValueError(
|
|
391
|
+
f"evaluation_labels must be a subset or superset of labels. "
|
|
392
|
+
f"labels={self.labels}, evaluation_labels={self.evaluation_labels}"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
def _align_predictions(self, preds: np.ndarray) -> np.ndarray:
|
|
396
|
+
if self._needs_slicing and self._eval_indices is not None:
|
|
397
|
+
sliced = preds[:, self._eval_indices]
|
|
398
|
+
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
399
|
+
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
400
|
+
return sliced / row_sums
|
|
401
|
+
|
|
402
|
+
if self._needs_extension and self._extension_mapping is not None:
|
|
403
|
+
n_samples = preds.shape[0]
|
|
404
|
+
n_eval_labels = len(self.evaluation_labels)
|
|
405
|
+
extended = np.full((n_samples, n_eval_labels), 1e-5, dtype=np.float64)
|
|
406
|
+
for train_idx, eval_idx in self._extension_mapping.items():
|
|
407
|
+
extended[:, eval_idx] = preds[:, train_idx]
|
|
408
|
+
row_sums = extended.sum(axis=1, keepdims=True)
|
|
409
|
+
return extended / row_sums
|
|
373
410
|
|
|
374
|
-
|
|
375
|
-
if self._eval_indices is None:
|
|
376
|
-
return preds
|
|
377
|
-
sliced = preds[:, self._eval_indices]
|
|
378
|
-
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
379
|
-
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
380
|
-
return sliced / row_sums
|
|
411
|
+
return preds
|
|
381
412
|
|
|
382
413
|
def _get_scoring_labels(self) -> list[int]:
|
|
383
414
|
if self.evaluation_labels is not None:
|
|
@@ -446,7 +477,7 @@ class PWMSE(BaseScorer):
|
|
|
446
477
|
|
|
447
478
|
targets = gran_df[self.target].to_numpy().astype(np.float64)
|
|
448
479
|
preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
|
|
449
|
-
preds = self.
|
|
480
|
+
preds = self._align_predictions(preds)
|
|
450
481
|
score = self._pwmse_score(targets, preds)
|
|
451
482
|
if self.compare_to_naive:
|
|
452
483
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
@@ -464,7 +495,7 @@ class PWMSE(BaseScorer):
|
|
|
464
495
|
|
|
465
496
|
targets = df[self.target].to_numpy().astype(np.float64)
|
|
466
497
|
preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
|
|
467
|
-
preds = self.
|
|
498
|
+
preds = self._align_predictions(preds)
|
|
468
499
|
score = self._pwmse_score(targets, preds)
|
|
469
500
|
if self.compare_to_naive:
|
|
470
501
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.17
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -51,18 +51,18 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
|
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=JSTXdaRw_b8ZoZxgmMnZrYG7gPg8GKawqalLd16SK1M,56066
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
58
|
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
59
59
|
spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
|
|
60
|
-
spforge/ratings/start_rating_generator.py,sha256=
|
|
60
|
+
spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
|
|
61
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
62
|
-
spforge/ratings/team_start_rating_generator.py,sha256=
|
|
62
|
+
spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
|
|
63
63
|
spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
|
|
64
64
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
65
|
-
spforge/scorer/_score.py,sha256=
|
|
65
|
+
spforge/scorer/_score.py,sha256=kNuqiK3F5mUEAVD7KjWYY7E_AkRrspR362QBm_jyElg,57623
|
|
66
66
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
67
67
|
spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
|
|
68
68
|
spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=g5SMTTolfRikHZfwIkExuoRjh-ldcr9-F-E1PUNpbpM,22923
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -94,12 +94,12 @@ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGF
|
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
|
|
95
95
|
tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
|
|
96
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
97
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=SKLaBQBsHYslc2Nia2AxZ8A9Cy16MbZAWjLyOjvcMnA,64094
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
100
100
|
tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
|
|
101
101
|
tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
|
|
102
|
-
tests/scorer/test_score.py,sha256=
|
|
102
|
+
tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
|
|
103
103
|
tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
|
|
104
104
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
105
105
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
@@ -107,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
107
107
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
108
108
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
109
109
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
110
|
-
spforge-0.8.
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
110
|
+
spforge-0.8.17.dist-info/METADATA,sha256=Zc4fLlCtPWuEFvs0DVRZre9OtTvRyVMgdmGV7-s68Ao,20048
|
|
111
|
+
spforge-0.8.17.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
112
|
+
spforge-0.8.17.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
113
|
+
spforge-0.8.17.dist-info/RECORD,,
|
|
@@ -1746,3 +1746,73 @@ def test_fit_transform__player_rating_difference_from_team_projected_feature(bas
|
|
|
1746
1746
|
for row in result.iter_rows(named=True):
|
|
1747
1747
|
expected = row[player_col] - row[team_col]
|
|
1748
1748
|
assert row[diff_col] == pytest.approx(expected, rel=1e-9)
|
|
1749
|
+
|
|
1750
|
+
|
|
1751
|
+
def test_fit_transform__start_league_quantile_uses_existing_player_ratings(base_cn):
|
|
1752
|
+
"""
|
|
1753
|
+
Bug reproduction: start_league_quantile should use percentile of existing player
|
|
1754
|
+
ratings for new players, but update_players_to_leagues is never called so
|
|
1755
|
+
_league_player_ratings stays empty and all new players get default rating.
|
|
1756
|
+
|
|
1757
|
+
Expected: New player P_NEW should start at 5th percentile of existing ratings (~920)
|
|
1758
|
+
Actual: New player starts at default 1000 because _league_player_ratings is empty
|
|
1759
|
+
"""
|
|
1760
|
+
import numpy as np
|
|
1761
|
+
|
|
1762
|
+
num_existing_players = 60
|
|
1763
|
+
player_ids = [f"P{i}" for i in range(num_existing_players)]
|
|
1764
|
+
team_ids = [f"T{i % 2 + 1}" for i in range(num_existing_players)]
|
|
1765
|
+
|
|
1766
|
+
df1 = pl.DataFrame(
|
|
1767
|
+
{
|
|
1768
|
+
"pid": player_ids,
|
|
1769
|
+
"tid": team_ids,
|
|
1770
|
+
"mid": ["M1"] * num_existing_players,
|
|
1771
|
+
"dt": ["2024-01-01"] * num_existing_players,
|
|
1772
|
+
"perf": [0.3 + (i % 10) * 0.07 for i in range(num_existing_players)],
|
|
1773
|
+
"pw": [1.0] * num_existing_players,
|
|
1774
|
+
}
|
|
1775
|
+
)
|
|
1776
|
+
|
|
1777
|
+
gen = PlayerRatingGenerator(
|
|
1778
|
+
performance_column="perf",
|
|
1779
|
+
column_names=base_cn,
|
|
1780
|
+
auto_scale_performance=True,
|
|
1781
|
+
start_league_quantile=0.05,
|
|
1782
|
+
start_min_count_for_percentiles=50,
|
|
1783
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
1784
|
+
)
|
|
1785
|
+
gen.fit_transform(df1)
|
|
1786
|
+
|
|
1787
|
+
existing_ratings = [
|
|
1788
|
+
gen._player_off_ratings[pid].rating_value for pid in player_ids
|
|
1789
|
+
]
|
|
1790
|
+
expected_quantile_rating = np.percentile(existing_ratings, 5)
|
|
1791
|
+
|
|
1792
|
+
srg = gen.start_rating_generator
|
|
1793
|
+
assert len(srg._league_player_ratings.get(None, [])) >= 50, (
|
|
1794
|
+
f"Expected _league_player_ratings to have >=50 entries but got "
|
|
1795
|
+
f"{len(srg._league_player_ratings.get(None, []))}. "
|
|
1796
|
+
"update_players_to_leagues is never called."
|
|
1797
|
+
)
|
|
1798
|
+
|
|
1799
|
+
df2 = pl.DataFrame(
|
|
1800
|
+
{
|
|
1801
|
+
"pid": ["P_NEW", "P0"],
|
|
1802
|
+
"tid": ["T1", "T2"],
|
|
1803
|
+
"mid": ["M2", "M2"],
|
|
1804
|
+
"dt": ["2024-01-02", "2024-01-02"],
|
|
1805
|
+
"pw": [1.0, 1.0],
|
|
1806
|
+
}
|
|
1807
|
+
)
|
|
1808
|
+
result = gen.future_transform(df2)
|
|
1809
|
+
|
|
1810
|
+
new_player_start_rating = result.filter(pl.col("pid") == "P_NEW")[
|
|
1811
|
+
"player_off_rating_perf"
|
|
1812
|
+
][0]
|
|
1813
|
+
|
|
1814
|
+
assert new_player_start_rating == pytest.approx(expected_quantile_rating, rel=0.1), (
|
|
1815
|
+
f"New player should start at 5th percentile ({expected_quantile_rating:.1f}) "
|
|
1816
|
+
f"but got {new_player_start_rating:.1f}. "
|
|
1817
|
+
"start_league_quantile has no effect because update_players_to_leagues is never called."
|
|
1818
|
+
)
|
tests/scorer/test_score.py
CHANGED
|
@@ -2138,3 +2138,145 @@ def test_scorers_respect_validation_column(scorer_factory, df_factory):
|
|
|
2138
2138
|
score_all = scorer_factory().score(df)
|
|
2139
2139
|
score_valid = scorer_factory().score(df_valid)
|
|
2140
2140
|
assert score_all == score_valid
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
# ============================================================================
|
|
2144
|
+
# PWMSE evaluation_labels Extension Tests
|
|
2145
|
+
# ============================================================================
|
|
2146
|
+
|
|
2147
|
+
|
|
2148
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
2149
|
+
def test_pwmse__evaluation_labels_extends_predictions(df_type):
|
|
2150
|
+
"""PWMSE with evaluation_labels as superset extends predictions with small probs."""
|
|
2151
|
+
df = create_dataframe(
|
|
2152
|
+
df_type,
|
|
2153
|
+
{
|
|
2154
|
+
"pred": [
|
|
2155
|
+
[0.3, 0.5, 0.2],
|
|
2156
|
+
[0.2, 0.6, 0.2],
|
|
2157
|
+
],
|
|
2158
|
+
"target": [0, 1],
|
|
2159
|
+
},
|
|
2160
|
+
)
|
|
2161
|
+
|
|
2162
|
+
scorer = PWMSE(
|
|
2163
|
+
pred_column="pred",
|
|
2164
|
+
target="target",
|
|
2165
|
+
labels=[0, 1, 2],
|
|
2166
|
+
evaluation_labels=[-1, 0, 1, 2, 3],
|
|
2167
|
+
)
|
|
2168
|
+
score = scorer.score(df)
|
|
2169
|
+
|
|
2170
|
+
n_eval_labels = 5
|
|
2171
|
+
eps = 1e-5
|
|
2172
|
+
preds_original = np.array([[0.3, 0.5, 0.2], [0.2, 0.6, 0.2]])
|
|
2173
|
+
extended = np.full((2, n_eval_labels), eps, dtype=np.float64)
|
|
2174
|
+
extended[:, 1] = preds_original[:, 0]
|
|
2175
|
+
extended[:, 2] = preds_original[:, 1]
|
|
2176
|
+
extended[:, 3] = preds_original[:, 2]
|
|
2177
|
+
row_sums = extended.sum(axis=1, keepdims=True)
|
|
2178
|
+
preds_renorm = extended / row_sums
|
|
2179
|
+
|
|
2180
|
+
eval_labels = np.array([-1, 0, 1, 2, 3], dtype=np.float64)
|
|
2181
|
+
targets = np.array([0, 1], dtype=np.float64)
|
|
2182
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
2183
|
+
expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
2184
|
+
|
|
2185
|
+
assert abs(score - expected) < 1e-10
|
|
2186
|
+
|
|
2187
|
+
|
|
2188
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
2189
|
+
def test_pwmse__evaluation_labels_exact_match(df_type):
|
|
2190
|
+
"""PWMSE with evaluation_labels identical to labels (no-op)."""
|
|
2191
|
+
df = create_dataframe(
|
|
2192
|
+
df_type,
|
|
2193
|
+
{
|
|
2194
|
+
"pred": [
|
|
2195
|
+
[0.3, 0.5, 0.2],
|
|
2196
|
+
[0.2, 0.6, 0.2],
|
|
2197
|
+
],
|
|
2198
|
+
"target": [0, 1],
|
|
2199
|
+
},
|
|
2200
|
+
)
|
|
2201
|
+
|
|
2202
|
+
scorer_with_eval = PWMSE(
|
|
2203
|
+
pred_column="pred",
|
|
2204
|
+
target="target",
|
|
2205
|
+
labels=[0, 1, 2],
|
|
2206
|
+
evaluation_labels=[0, 1, 2],
|
|
2207
|
+
)
|
|
2208
|
+
scorer_without_eval = PWMSE(
|
|
2209
|
+
pred_column="pred",
|
|
2210
|
+
target="target",
|
|
2211
|
+
labels=[0, 1, 2],
|
|
2212
|
+
)
|
|
2213
|
+
|
|
2214
|
+
score_with = scorer_with_eval.score(df)
|
|
2215
|
+
score_without = scorer_without_eval.score(df)
|
|
2216
|
+
|
|
2217
|
+
assert abs(score_with - score_without) < 1e-10
|
|
2218
|
+
|
|
2219
|
+
|
|
2220
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
2221
|
+
def test_pwmse__evaluation_labels_partial_overlap_raises(df_type):
|
|
2222
|
+
"""PWMSE with partial overlap between labels and evaluation_labels raises."""
|
|
2223
|
+
with pytest.raises(ValueError, match="evaluation_labels must be a subset or superset"):
|
|
2224
|
+
PWMSE(
|
|
2225
|
+
pred_column="pred",
|
|
2226
|
+
target="target",
|
|
2227
|
+
labels=[0, 1, 2],
|
|
2228
|
+
evaluation_labels=[1, 2, 3],
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2231
|
+
|
|
2232
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
2233
|
+
def test_pwmse__evaluation_labels_extends_with_compare_to_naive(df_type):
|
|
2234
|
+
"""PWMSE extension mode works correctly with compare_to_naive."""
|
|
2235
|
+
df = create_dataframe(
|
|
2236
|
+
df_type,
|
|
2237
|
+
{
|
|
2238
|
+
"pred": [
|
|
2239
|
+
[0.8, 0.15, 0.05],
|
|
2240
|
+
[0.1, 0.7, 0.2],
|
|
2241
|
+
[0.05, 0.15, 0.8],
|
|
2242
|
+
[0.3, 0.4, 0.3],
|
|
2243
|
+
],
|
|
2244
|
+
"target": [0, 1, 2, 1],
|
|
2245
|
+
},
|
|
2246
|
+
)
|
|
2247
|
+
|
|
2248
|
+
scorer = PWMSE(
|
|
2249
|
+
pred_column="pred",
|
|
2250
|
+
target="target",
|
|
2251
|
+
labels=[0, 1, 2],
|
|
2252
|
+
evaluation_labels=[-1, 0, 1, 2, 3],
|
|
2253
|
+
compare_to_naive=True,
|
|
2254
|
+
)
|
|
2255
|
+
score = scorer.score(df)
|
|
2256
|
+
|
|
2257
|
+
n_eval_labels = 5
|
|
2258
|
+
eps = 1e-5
|
|
2259
|
+
preds_original = np.array([
|
|
2260
|
+
[0.8, 0.15, 0.05],
|
|
2261
|
+
[0.1, 0.7, 0.2],
|
|
2262
|
+
[0.05, 0.15, 0.8],
|
|
2263
|
+
[0.3, 0.4, 0.3],
|
|
2264
|
+
])
|
|
2265
|
+
extended = np.full((4, n_eval_labels), eps, dtype=np.float64)
|
|
2266
|
+
extended[:, 1] = preds_original[:, 0]
|
|
2267
|
+
extended[:, 2] = preds_original[:, 1]
|
|
2268
|
+
extended[:, 3] = preds_original[:, 2]
|
|
2269
|
+
row_sums = extended.sum(axis=1, keepdims=True)
|
|
2270
|
+
preds_renorm = extended / row_sums
|
|
2271
|
+
|
|
2272
|
+
eval_labels = np.array([-1, 0, 1, 2, 3], dtype=np.float64)
|
|
2273
|
+
targets = np.array([0, 1, 2, 1], dtype=np.float64)
|
|
2274
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
2275
|
+
model_score = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
2276
|
+
|
|
2277
|
+
naive_probs = np.array([0.0, 0.25, 0.5, 0.25, 0.0])
|
|
2278
|
+
naive_preds = np.tile(naive_probs, (4, 1))
|
|
2279
|
+
naive_score = float((diffs_sqd * naive_preds).sum(axis=1).mean())
|
|
2280
|
+
|
|
2281
|
+
expected = naive_score - model_score
|
|
2282
|
+
assert abs(score - expected) < 1e-10
|
|
File without changes
|
|
File without changes
|
|
File without changes
|