spforge 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/ratings/_player_rating.py +57 -24
- spforge/scorer/_score.py +114 -50
- {spforge-0.8.25.dist-info → spforge-0.8.27.dist-info}/METADATA +1 -1
- {spforge-0.8.25.dist-info → spforge-0.8.27.dist-info}/RECORD +9 -9
- tests/ratings/test_player_rating_generator.py +34 -1
- tests/scorer/test_score_aggregation_granularity.py +69 -0
- {spforge-0.8.25.dist-info → spforge-0.8.27.dist-info}/WHEEL +0 -0
- {spforge-0.8.25.dist-info → spforge-0.8.27.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.25.dist-info → spforge-0.8.27.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import copy
|
|
5
|
-
import
|
|
5
|
+
import json
|
|
6
6
|
import logging
|
|
7
|
+
import math
|
|
8
|
+
from collections.abc import Mapping
|
|
7
9
|
from typing import Any, Literal
|
|
8
10
|
|
|
9
11
|
import narwhals.stable.v2 as nw
|
|
@@ -912,10 +914,16 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
912
914
|
if cn.league and cn.league in df.columns:
|
|
913
915
|
player_stat_cols.append(cn.league)
|
|
914
916
|
|
|
915
|
-
if
|
|
917
|
+
if (
|
|
918
|
+
cn.team_players_playing_time
|
|
919
|
+
and cn.team_players_playing_time in df.columns
|
|
920
|
+
):
|
|
916
921
|
player_stat_cols.append(cn.team_players_playing_time)
|
|
917
922
|
|
|
918
|
-
if
|
|
923
|
+
if (
|
|
924
|
+
cn.opponent_players_playing_time
|
|
925
|
+
and cn.opponent_players_playing_time in df.columns
|
|
926
|
+
):
|
|
919
927
|
player_stat_cols.append(cn.opponent_players_playing_time)
|
|
920
928
|
|
|
921
929
|
df = df.with_columns(pl.struct(player_stat_cols).alias(PLAYER_STATS))
|
|
@@ -948,6 +956,40 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
948
956
|
match_df = self._add_day_number(match_df, cn.start_date, "__day_number")
|
|
949
957
|
return match_df
|
|
950
958
|
|
|
959
|
+
def _get_players_playing_time(
|
|
960
|
+
self, source: Mapping[str, Any], column_name: str | None
|
|
961
|
+
) -> dict[str, float] | None:
|
|
962
|
+
if not column_name:
|
|
963
|
+
return None
|
|
964
|
+
return self._normalize_players_playing_time(source.get(column_name))
|
|
965
|
+
|
|
966
|
+
@staticmethod
|
|
967
|
+
def _normalize_players_playing_time(raw_value: Any) -> dict[str, float] | None:
|
|
968
|
+
if raw_value is None:
|
|
969
|
+
return None
|
|
970
|
+
|
|
971
|
+
if isinstance(raw_value, str):
|
|
972
|
+
raw_text = raw_value
|
|
973
|
+
raw_value = raw_value.strip()
|
|
974
|
+
if not raw_value:
|
|
975
|
+
return None
|
|
976
|
+
try:
|
|
977
|
+
raw_value = json.loads(raw_value)
|
|
978
|
+
except json.JSONDecodeError as exc:
|
|
979
|
+
raise ValueError(
|
|
980
|
+
f"unable to parse playing time JSON {raw_text!r}: {exc}"
|
|
981
|
+
) from exc
|
|
982
|
+
|
|
983
|
+
if isinstance(raw_value, Mapping):
|
|
984
|
+
normalized: dict[str, float] = {}
|
|
985
|
+
for key, value in raw_value.items():
|
|
986
|
+
if value is None:
|
|
987
|
+
continue
|
|
988
|
+
normalized[str(key)] = float(value)
|
|
989
|
+
return normalized or None
|
|
990
|
+
|
|
991
|
+
return None
|
|
992
|
+
|
|
951
993
|
def _create_pre_match_players_collection(
|
|
952
994
|
self, r: dict, stats_col: str, day_number: int, team_id: str
|
|
953
995
|
) -> PreMatchPlayersCollection:
|
|
@@ -994,17 +1036,12 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
994
1036
|
else None
|
|
995
1037
|
)
|
|
996
1038
|
|
|
997
|
-
team_playing_time =
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
if cn.opponent_players_playing_time:
|
|
1005
|
-
raw_value = team_player.get(cn.opponent_players_playing_time)
|
|
1006
|
-
if raw_value is not None:
|
|
1007
|
-
opponent_playing_time = raw_value
|
|
1039
|
+
team_playing_time = self._get_players_playing_time(
|
|
1040
|
+
team_player, cn.team_players_playing_time
|
|
1041
|
+
)
|
|
1042
|
+
opponent_playing_time = self._get_players_playing_time(
|
|
1043
|
+
team_player, cn.opponent_players_playing_time
|
|
1044
|
+
)
|
|
1008
1045
|
|
|
1009
1046
|
mp = MatchPerformance(
|
|
1010
1047
|
performance_value=perf_val,
|
|
@@ -1245,16 +1282,12 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1245
1282
|
ppw = pw
|
|
1246
1283
|
proj_w.append(float(ppw))
|
|
1247
1284
|
|
|
1248
|
-
team_playing_time =
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
if cn.opponent_players_playing_time:
|
|
1255
|
-
raw_value = tp.get(cn.opponent_players_playing_time)
|
|
1256
|
-
if raw_value is not None:
|
|
1257
|
-
opponent_playing_time = raw_value
|
|
1285
|
+
team_playing_time = self._get_players_playing_time(
|
|
1286
|
+
tp, cn.team_players_playing_time
|
|
1287
|
+
)
|
|
1288
|
+
opponent_playing_time = self._get_players_playing_time(
|
|
1289
|
+
tp, cn.opponent_players_playing_time
|
|
1290
|
+
)
|
|
1258
1291
|
|
|
1259
1292
|
mp = MatchPerformance(
|
|
1260
1293
|
performance_value=get_perf_value(tp),
|
spforge/scorer/_score.py
CHANGED
|
@@ -263,6 +263,7 @@ class BaseScorer(ABC):
|
|
|
263
263
|
validation_column: str | None,
|
|
264
264
|
filters: list[Filter] | None = None,
|
|
265
265
|
aggregation_level: list[str] | None = None,
|
|
266
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
266
267
|
granularity: list[str] | None = None,
|
|
267
268
|
compare_to_naive: bool = False,
|
|
268
269
|
naive_granularity: list[str] | None = None,
|
|
@@ -274,6 +275,7 @@ class BaseScorer(ABC):
|
|
|
274
275
|
If set, the scorer will be calculated only once the values of the validation column are equal to 1
|
|
275
276
|
:param filters: The filters to apply before calculating
|
|
276
277
|
:param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
|
|
278
|
+
:param aggregation_method: Aggregation methods for pred/target when aggregation_level is set.
|
|
277
279
|
:param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
|
|
278
280
|
"""
|
|
279
281
|
self.target = target
|
|
@@ -289,28 +291,59 @@ class BaseScorer(ABC):
|
|
|
289
291
|
)
|
|
290
292
|
)
|
|
291
293
|
self.aggregation_level = aggregation_level
|
|
294
|
+
self.aggregation_method = aggregation_method
|
|
292
295
|
self.granularity = granularity
|
|
293
296
|
self.compare_to_naive = compare_to_naive
|
|
294
297
|
self.naive_granularity = naive_granularity
|
|
295
298
|
|
|
299
|
+
def _resolve_aggregation_method(self, key: str) -> Any:
|
|
300
|
+
if self.aggregation_method is None:
|
|
301
|
+
return "sum"
|
|
302
|
+
method = self.aggregation_method.get(key)
|
|
303
|
+
if method is None:
|
|
304
|
+
return "sum"
|
|
305
|
+
return method
|
|
306
|
+
|
|
307
|
+
def _build_aggregation_expr(self, df: IntoFrameT, col: str, method: Any) -> Any:
|
|
308
|
+
if isinstance(method, tuple):
|
|
309
|
+
if len(method) != 2 or method[0] != "weighted_mean":
|
|
310
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
311
|
+
weight_col = method[1]
|
|
312
|
+
if weight_col not in df.columns:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"Aggregation weight column '{weight_col}' not found in dataframe columns."
|
|
315
|
+
)
|
|
316
|
+
weighted_sum = (nw.col(col) * nw.col(weight_col)).sum()
|
|
317
|
+
weight_total = nw.col(weight_col).sum()
|
|
318
|
+
return (weighted_sum / weight_total).alias(col)
|
|
319
|
+
|
|
320
|
+
if method == "sum":
|
|
321
|
+
return nw.col(col).sum().alias(col)
|
|
322
|
+
if method == "mean":
|
|
323
|
+
return nw.col(col).mean().alias(col)
|
|
324
|
+
if method == "first":
|
|
325
|
+
return nw.col(col).first().alias(col)
|
|
326
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
327
|
+
|
|
296
328
|
def _apply_aggregation_level(self, df: IntoFrameT) -> IntoFrameT:
|
|
297
329
|
"""Apply aggregation_level grouping if set"""
|
|
298
330
|
if self.aggregation_level:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
agg_exprs = [
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
# Use sum for aggregation
|
|
306
|
-
agg_exprs.append(nw.col(col).sum().alias(col))
|
|
307
|
-
except Exception:
|
|
308
|
-
# Fallback to mean or first
|
|
309
|
-
agg_exprs.append(nw.col(col).mean().alias(col))
|
|
310
|
-
|
|
331
|
+
pred_method = self._resolve_aggregation_method("pred")
|
|
332
|
+
target_method = self._resolve_aggregation_method("target")
|
|
333
|
+
agg_exprs = [
|
|
334
|
+
self._build_aggregation_expr(df, self.pred_column, pred_method),
|
|
335
|
+
self._build_aggregation_expr(df, self.target, target_method),
|
|
336
|
+
]
|
|
311
337
|
df = df.group_by(self.aggregation_level).agg(agg_exprs)
|
|
312
338
|
return df
|
|
313
339
|
|
|
340
|
+
@narwhals.narwhalify
|
|
341
|
+
def aggregate(self, df: IntoFrameT) -> IntoFrameT:
|
|
342
|
+
df = apply_filters(df, self.filters)
|
|
343
|
+
if not hasattr(df, "to_native"):
|
|
344
|
+
df = nw.from_native(df)
|
|
345
|
+
return self._apply_aggregation_level(df)
|
|
346
|
+
|
|
314
347
|
def _get_granularity_groups(self, df: IntoFrameT) -> list[tuple]:
|
|
315
348
|
"""Get list of granularity tuples from dataframe"""
|
|
316
349
|
if not self.granularity:
|
|
@@ -345,6 +378,7 @@ class PWMSE(BaseScorer):
|
|
|
345
378
|
target: str,
|
|
346
379
|
validation_column: str | None = None,
|
|
347
380
|
aggregation_level: list[str] | None = None,
|
|
381
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
348
382
|
granularity: list[str] | None = None,
|
|
349
383
|
filters: list[Filter] | None = None,
|
|
350
384
|
labels: list[int] | None = None,
|
|
@@ -357,6 +391,7 @@ class PWMSE(BaseScorer):
|
|
|
357
391
|
target=target,
|
|
358
392
|
pred_column=pred_column,
|
|
359
393
|
aggregation_level=aggregation_level,
|
|
394
|
+
aggregation_method=aggregation_method,
|
|
360
395
|
granularity=granularity,
|
|
361
396
|
filters=filters,
|
|
362
397
|
validation_column=validation_column,
|
|
@@ -454,12 +489,7 @@ class PWMSE(BaseScorer):
|
|
|
454
489
|
|
|
455
490
|
pass
|
|
456
491
|
else:
|
|
457
|
-
df =
|
|
458
|
-
[
|
|
459
|
-
nw.col(self.pred_column).mean().alias(self.pred_column),
|
|
460
|
-
nw.col(self.target).mean().alias(self.target),
|
|
461
|
-
]
|
|
462
|
-
)
|
|
492
|
+
df = self._apply_aggregation_level(df)
|
|
463
493
|
|
|
464
494
|
if self.granularity:
|
|
465
495
|
results = {}
|
|
@@ -517,6 +547,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
517
547
|
target: str,
|
|
518
548
|
validation_column: str | None = None,
|
|
519
549
|
aggregation_level: list[str] | None = None,
|
|
550
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
520
551
|
granularity: list[str] | None = None,
|
|
521
552
|
filters: list[Filter] | None = None,
|
|
522
553
|
labels: list[int] | None = None,
|
|
@@ -540,6 +571,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
540
571
|
target=target,
|
|
541
572
|
pred_column=pred_column,
|
|
542
573
|
aggregation_level=aggregation_level,
|
|
574
|
+
aggregation_method=aggregation_method,
|
|
543
575
|
granularity=granularity,
|
|
544
576
|
filters=filters,
|
|
545
577
|
validation_column=validation_column,
|
|
@@ -582,12 +614,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
582
614
|
|
|
583
615
|
# Apply aggregation_level if set
|
|
584
616
|
if self.aggregation_level:
|
|
585
|
-
df =
|
|
586
|
-
[
|
|
587
|
-
nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
|
|
588
|
-
nw.col(self.target).sum().alias(self.target),
|
|
589
|
-
]
|
|
590
|
-
)
|
|
617
|
+
df = self._apply_aggregation_level(df)
|
|
591
618
|
# After group_by, ensure df is still a Narwhals DataFrame
|
|
592
619
|
if not hasattr(df, "to_native"):
|
|
593
620
|
df = nw.from_native(df)
|
|
@@ -658,6 +685,7 @@ class SklearnScorer(BaseScorer):
|
|
|
658
685
|
target: str,
|
|
659
686
|
validation_column: str | None = None,
|
|
660
687
|
aggregation_level: list[str] | None = None,
|
|
688
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
661
689
|
granularity: list[str] | None = None,
|
|
662
690
|
filters: list[Filter] | None = None,
|
|
663
691
|
params: dict[str, Any] = None,
|
|
@@ -679,6 +707,7 @@ class SklearnScorer(BaseScorer):
|
|
|
679
707
|
target=target,
|
|
680
708
|
pred_column=pred_column,
|
|
681
709
|
aggregation_level=aggregation_level,
|
|
710
|
+
aggregation_method=aggregation_method,
|
|
682
711
|
granularity=granularity,
|
|
683
712
|
filters=filters,
|
|
684
713
|
validation_column=validation_column,
|
|
@@ -756,12 +785,7 @@ class SklearnScorer(BaseScorer):
|
|
|
756
785
|
)
|
|
757
786
|
|
|
758
787
|
if self.aggregation_level:
|
|
759
|
-
df =
|
|
760
|
-
[
|
|
761
|
-
nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
|
|
762
|
-
nw.col(self.target).sum().alias(self.target),
|
|
763
|
-
]
|
|
764
|
-
)
|
|
788
|
+
df = self._apply_aggregation_level(df)
|
|
765
789
|
if not hasattr(df, "to_native"):
|
|
766
790
|
df = nw.from_native(df)
|
|
767
791
|
|
|
@@ -798,6 +822,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
798
822
|
class_column_name: str = "classes",
|
|
799
823
|
validation_column: str | None = None,
|
|
800
824
|
aggregation_level: list[str] | None = None,
|
|
825
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
801
826
|
granularity: list[str] | None = None,
|
|
802
827
|
filters: list[Filter] | None = None,
|
|
803
828
|
compare_to_naive: bool = False,
|
|
@@ -810,6 +835,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
810
835
|
target=target,
|
|
811
836
|
pred_column=pred_column,
|
|
812
837
|
aggregation_level=aggregation_level,
|
|
838
|
+
aggregation_method=aggregation_method,
|
|
813
839
|
granularity=granularity,
|
|
814
840
|
filters=filters,
|
|
815
841
|
validation_column=validation_column,
|
|
@@ -817,6 +843,49 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
817
843
|
naive_granularity=naive_granularity,
|
|
818
844
|
)
|
|
819
845
|
|
|
846
|
+
def _aggregate_pandas_series(
|
|
847
|
+
self, df: pd.DataFrame, col: str, method: Any
|
|
848
|
+
) -> pd.Series:
|
|
849
|
+
grouped = df.groupby(self.aggregation_level, dropna=False)
|
|
850
|
+
if isinstance(method, tuple):
|
|
851
|
+
if len(method) != 2 or method[0] != "weighted_mean":
|
|
852
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
853
|
+
weight_col = method[1]
|
|
854
|
+
if weight_col not in df.columns:
|
|
855
|
+
raise ValueError(
|
|
856
|
+
f"Aggregation weight column '{weight_col}' not found in dataframe columns."
|
|
857
|
+
)
|
|
858
|
+
return grouped.apply(
|
|
859
|
+
lambda g: (g[col] * g[weight_col]).sum() / g[weight_col].sum()
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
if method == "sum":
|
|
863
|
+
return grouped[col].sum()
|
|
864
|
+
if method == "mean":
|
|
865
|
+
return grouped[col].mean()
|
|
866
|
+
if method == "first":
|
|
867
|
+
return grouped[col].first()
|
|
868
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
869
|
+
|
|
870
|
+
def _aggregate_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
871
|
+
if not self.aggregation_level:
|
|
872
|
+
return df
|
|
873
|
+
pred_method = self._resolve_aggregation_method("pred")
|
|
874
|
+
target_method = self._resolve_aggregation_method("target")
|
|
875
|
+
agg_df = pd.DataFrame(
|
|
876
|
+
{
|
|
877
|
+
self.pred_column: self._aggregate_pandas_series(
|
|
878
|
+
df, self.pred_column, pred_method
|
|
879
|
+
),
|
|
880
|
+
self.target: self._aggregate_pandas_series(df, self.target, target_method),
|
|
881
|
+
self.class_column_name: df.groupby(self.aggregation_level, dropna=False)[
|
|
882
|
+
self.class_column_name
|
|
883
|
+
].first(),
|
|
884
|
+
}
|
|
885
|
+
)
|
|
886
|
+
agg_df.reset_index(inplace=True)
|
|
887
|
+
return agg_df
|
|
888
|
+
|
|
820
889
|
def _calculate_score_for_group(self, df: pd.DataFrame) -> float:
|
|
821
890
|
"""Calculate score for a single group (used for granularity)"""
|
|
822
891
|
df = df.copy()
|
|
@@ -948,13 +1017,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
948
1017
|
|
|
949
1018
|
# Apply aggregation_level if set
|
|
950
1019
|
if self.aggregation_level:
|
|
951
|
-
df = (
|
|
952
|
-
df.groupby(self.aggregation_level)
|
|
953
|
-
.agg(
|
|
954
|
-
{self.pred_column: "mean", self.target: "mean", self.class_column_name: "first"}
|
|
955
|
-
)
|
|
956
|
-
.reset_index()
|
|
957
|
-
)
|
|
1020
|
+
df = self._aggregate_pandas(df)
|
|
958
1021
|
|
|
959
1022
|
# If granularity is set, calculate separate scores per group
|
|
960
1023
|
if self.granularity:
|
|
@@ -995,6 +1058,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
995
1058
|
classes: list[int],
|
|
996
1059
|
validation_column: str | None = None,
|
|
997
1060
|
aggregation_level: list[str] | None = None,
|
|
1061
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
998
1062
|
granularity: list[str] | None = None,
|
|
999
1063
|
filters: list[Filter] | None = None,
|
|
1000
1064
|
labels: list[int] | None = None,
|
|
@@ -1006,6 +1070,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1006
1070
|
target=target,
|
|
1007
1071
|
pred_column=pred_column,
|
|
1008
1072
|
aggregation_level=aggregation_level,
|
|
1073
|
+
aggregation_method=aggregation_method,
|
|
1009
1074
|
granularity=granularity,
|
|
1010
1075
|
filters=filters,
|
|
1011
1076
|
validation_column=validation_column,
|
|
@@ -1102,14 +1167,10 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1102
1167
|
if not hasattr(df, "to_native"):
|
|
1103
1168
|
df = nw.from_native(df)
|
|
1104
1169
|
|
|
1105
|
-
df_native = df.to_native()
|
|
1106
|
-
df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
|
|
1107
|
-
|
|
1108
1170
|
# Filter out null and NaN targets
|
|
1109
|
-
before = len(
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
after = len(df_pl)
|
|
1171
|
+
before = len(df)
|
|
1172
|
+
df = _filter_nulls_and_nans(df, self.target)
|
|
1173
|
+
after = len(df)
|
|
1113
1174
|
if before != after:
|
|
1114
1175
|
_logger.info(
|
|
1115
1176
|
"OrdinalLossScorer: Dropped %d rows with NaN target (%d → %d)",
|
|
@@ -1119,12 +1180,12 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1119
1180
|
)
|
|
1120
1181
|
|
|
1121
1182
|
if self.aggregation_level:
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1183
|
+
df = self._apply_aggregation_level(df)
|
|
1184
|
+
|
|
1185
|
+
df_native = df.to_native()
|
|
1186
|
+
df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
|
|
1187
|
+
if df_pl.is_empty():
|
|
1188
|
+
return {} if self.granularity else 0.0
|
|
1128
1189
|
|
|
1129
1190
|
if self.granularity:
|
|
1130
1191
|
results = {}
|
|
@@ -1197,6 +1258,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1197
1258
|
threshold_rounding: str = "ceil",
|
|
1198
1259
|
validation_column: str | None = None,
|
|
1199
1260
|
aggregation_level: list[str] | None = None,
|
|
1261
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
1200
1262
|
granularity: list[str] | None = None,
|
|
1201
1263
|
filters: list["Filter"] | None = None,
|
|
1202
1264
|
compare_to_naive: bool = False,
|
|
@@ -1207,6 +1269,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1207
1269
|
target=self._EVENT_COL,
|
|
1208
1270
|
pred_column=dist_column,
|
|
1209
1271
|
aggregation_level=aggregation_level,
|
|
1272
|
+
aggregation_method=aggregation_method,
|
|
1210
1273
|
granularity=granularity,
|
|
1211
1274
|
filters=filters,
|
|
1212
1275
|
validation_column=validation_column,
|
|
@@ -1227,6 +1290,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1227
1290
|
target=self._EVENT_COL,
|
|
1228
1291
|
pred_column=self._P_EVENT_COL,
|
|
1229
1292
|
aggregation_level=aggregation_level,
|
|
1293
|
+
aggregation_method=aggregation_method,
|
|
1230
1294
|
granularity=granularity,
|
|
1231
1295
|
filters=None,
|
|
1232
1296
|
validation_column=validation_column,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.27
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -51,7 +51,7 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
|
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=TDw0LM-sLn27fprUhOW5csaDqAhzagoVm8SPKipZZmg,61106
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
@@ -62,7 +62,7 @@ spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH
|
|
|
62
62
|
spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
|
|
63
63
|
spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
|
|
64
64
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
65
|
-
spforge/scorer/_score.py,sha256=
|
|
65
|
+
spforge/scorer/_score.py,sha256=jZPaKiSpb14Lzec4HWrK3CfpkqPaN33mriHUZosRozM,60566
|
|
66
66
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
67
67
|
spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
|
|
68
68
|
spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.27.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -94,20 +94,20 @@ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGF
|
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
|
|
95
95
|
tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
|
|
96
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
97
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=tpU83Orw1nlus29a0s9vc1pghL-f2rs642viW_6wFgk,83633
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
100
100
|
tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
|
|
101
101
|
tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
|
|
102
102
|
tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
|
|
103
|
-
tests/scorer/test_score_aggregation_granularity.py,sha256=
|
|
103
|
+
tests/scorer/test_score_aggregation_granularity.py,sha256=O5TRlG9UE4NBpF0tL_ywZKDmkMIorwrxgTegQ75Tr7A,15871
|
|
104
104
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
105
105
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
106
106
|
tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
107
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
108
108
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
109
109
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
110
|
-
spforge-0.8.
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
110
|
+
spforge-0.8.27.dist-info/METADATA,sha256=Bl0sOhG9rDGmQpTThxwNPlAKyXeR6dCxWlGmbHH0LN0,20048
|
|
111
|
+
spforge-0.8.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
112
|
+
spforge-0.8.27.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
113
|
+
spforge-0.8.27.dist-info/RECORD,,
|
|
@@ -4,7 +4,7 @@ import polars as pl
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
6
6
|
from spforge import ColumnNames
|
|
7
|
-
from spforge.data_structures import RatingState
|
|
7
|
+
from spforge.data_structures import PlayerRating, RatingState
|
|
8
8
|
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures, RatingUnknownFeatures
|
|
9
9
|
|
|
10
10
|
|
|
@@ -78,6 +78,39 @@ def test_fit_transform_updates_internal_state(base_cn, sample_df):
|
|
|
78
78
|
assert "P1" in gen._player_off_ratings
|
|
79
79
|
assert "P1" in gen._player_def_ratings
|
|
80
80
|
|
|
81
|
+
|
|
82
|
+
def test_pre_match_collection_parses_playing_time_json(base_cn):
|
|
83
|
+
"""JSON strings in the team/opponent playing time columns should become dicts."""
|
|
84
|
+
from dataclasses import replace
|
|
85
|
+
|
|
86
|
+
cn = replace(
|
|
87
|
+
base_cn,
|
|
88
|
+
team_players_playing_time="team_pt",
|
|
89
|
+
opponent_players_playing_time="opp_pt",
|
|
90
|
+
)
|
|
91
|
+
gen = PlayerRatingGenerator(performance_column="perf", column_names=cn)
|
|
92
|
+
gen._player_off_ratings["P1"] = PlayerRating(id="P1", rating_value=100.0)
|
|
93
|
+
gen._player_def_ratings["P1"] = PlayerRating(id="P1", rating_value=100.0)
|
|
94
|
+
|
|
95
|
+
stats_entry = {
|
|
96
|
+
cn.player_id: "P1",
|
|
97
|
+
"perf": 0.75,
|
|
98
|
+
cn.participation_weight: 1.0,
|
|
99
|
+
cn.team_players_playing_time: '{"P1": 30}',
|
|
100
|
+
cn.opponent_players_playing_time: '{"P3": 25}',
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
collection = gen._create_pre_match_players_collection(
|
|
104
|
+
r={"__PLAYER_STATS": [stats_entry]},
|
|
105
|
+
stats_col="__PLAYER_STATS",
|
|
106
|
+
day_number=1,
|
|
107
|
+
team_id="T1",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
match_perf = collection.pre_match_player_ratings[0].match_performance
|
|
111
|
+
assert match_perf.team_players_playing_time == {"P1": 30.0}
|
|
112
|
+
assert match_perf.opponent_players_playing_time == {"P3": 25.0}
|
|
113
|
+
|
|
81
114
|
assert gen._player_off_ratings["P1"].rating_value > 0
|
|
82
115
|
|
|
83
116
|
|
|
@@ -21,6 +21,16 @@ def create_dataframe(df_type, data: dict):
|
|
|
21
21
|
return df_type(data)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def to_pandas_df(df):
|
|
25
|
+
if hasattr(df, "to_native"):
|
|
26
|
+
df = df.to_native()
|
|
27
|
+
if isinstance(df, pd.DataFrame):
|
|
28
|
+
return df
|
|
29
|
+
if isinstance(df, pl.DataFrame):
|
|
30
|
+
return df.to_pandas()
|
|
31
|
+
raise TypeError(f"Unsupported dataframe type: {type(df)}")
|
|
32
|
+
|
|
33
|
+
|
|
24
34
|
# ============================================================================
|
|
25
35
|
# Aggregation Level Tests
|
|
26
36
|
# ============================================================================
|
|
@@ -104,6 +114,65 @@ def test_pwmse_aggregation_level(df_type):
|
|
|
104
114
|
assert score >= 0
|
|
105
115
|
|
|
106
116
|
|
|
117
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
118
|
+
def test_aggregate_returns_grouped_frame(df_type):
|
|
119
|
+
"""aggregate returns filtered, grouped dataframe with default sum behavior"""
|
|
120
|
+
df = create_dataframe(
|
|
121
|
+
df_type,
|
|
122
|
+
{
|
|
123
|
+
"game_id": [1, 1, 1, 1],
|
|
124
|
+
"player_id": [1, 2, 3, 4],
|
|
125
|
+
"team_id": [1, 1, 2, 2],
|
|
126
|
+
"pred": [0.5, 0.6, 0.3, 0.4],
|
|
127
|
+
"target": [0, 1, 0, 1],
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
scorer = MeanBiasScorer(
|
|
132
|
+
pred_column="pred", target="target", aggregation_level=["game_id", "team_id"]
|
|
133
|
+
)
|
|
134
|
+
aggregated = to_pandas_df(scorer.aggregate(df))
|
|
135
|
+
|
|
136
|
+
assert len(aggregated) == 2
|
|
137
|
+
team1 = aggregated[aggregated["team_id"] == 1].iloc[0]
|
|
138
|
+
team2 = aggregated[aggregated["team_id"] == 2].iloc[0]
|
|
139
|
+
assert abs(team1["pred"] - 1.1) < 1e-10
|
|
140
|
+
assert abs(team1["target"] - 1.0) < 1e-10
|
|
141
|
+
assert abs(team2["pred"] - 0.7) < 1e-10
|
|
142
|
+
assert abs(team2["target"] - 1.0) < 1e-10
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
146
|
+
def test_weighted_mean_aggregation_method(df_type):
|
|
147
|
+
"""Weighted mean aggregation uses provided weight column"""
|
|
148
|
+
df = create_dataframe(
|
|
149
|
+
df_type,
|
|
150
|
+
{
|
|
151
|
+
"game_id": [1, 1, 1],
|
|
152
|
+
"team_id": [1, 1, 1],
|
|
153
|
+
"pred": [0.4, 0.6, 0.9],
|
|
154
|
+
"target": [0.5, 0.7, 0.2],
|
|
155
|
+
"attempts": [10, 20, 30],
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
scorer = MeanBiasScorer(
|
|
160
|
+
pred_column="pred",
|
|
161
|
+
target="target",
|
|
162
|
+
aggregation_level=["game_id", "team_id"],
|
|
163
|
+
aggregation_method={
|
|
164
|
+
"pred": ("weighted_mean", "attempts"),
|
|
165
|
+
"target": ("weighted_mean", "attempts"),
|
|
166
|
+
},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
score = scorer.score(df)
|
|
170
|
+
expected_pred = (0.4 * 10 + 0.6 * 20 + 0.9 * 30) / 60
|
|
171
|
+
expected_target = (0.5 * 10 + 0.7 * 20 + 0.2 * 30) / 60
|
|
172
|
+
expected = expected_pred - expected_target
|
|
173
|
+
assert abs(score - expected) < 1e-10
|
|
174
|
+
|
|
175
|
+
|
|
107
176
|
# ============================================================================
|
|
108
177
|
# Granularity Tests (Separate Scores Per Group)
|
|
109
178
|
# ============================================================================
|
|
File without changes
|
|
File without changes
|
|
File without changes
|