spforge 0.8.25__py3-none-any.whl → 0.8.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

@@ -2,8 +2,10 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import copy
5
- import math
5
+ import json
6
6
  import logging
7
+ import math
8
+ from collections.abc import Mapping
7
9
  from typing import Any, Literal
8
10
 
9
11
  import narwhals.stable.v2 as nw
@@ -912,10 +914,16 @@ class PlayerRatingGenerator(RatingGenerator):
912
914
  if cn.league and cn.league in df.columns:
913
915
  player_stat_cols.append(cn.league)
914
916
 
915
- if cn.team_players_playing_time and cn.team_players_playing_time in df.columns:
917
+ if (
918
+ cn.team_players_playing_time
919
+ and cn.team_players_playing_time in df.columns
920
+ ):
916
921
  player_stat_cols.append(cn.team_players_playing_time)
917
922
 
918
- if cn.opponent_players_playing_time and cn.opponent_players_playing_time in df.columns:
923
+ if (
924
+ cn.opponent_players_playing_time
925
+ and cn.opponent_players_playing_time in df.columns
926
+ ):
919
927
  player_stat_cols.append(cn.opponent_players_playing_time)
920
928
 
921
929
  df = df.with_columns(pl.struct(player_stat_cols).alias(PLAYER_STATS))
@@ -948,6 +956,40 @@ class PlayerRatingGenerator(RatingGenerator):
948
956
  match_df = self._add_day_number(match_df, cn.start_date, "__day_number")
949
957
  return match_df
950
958
 
959
+ def _get_players_playing_time(
960
+ self, source: Mapping[str, Any], column_name: str | None
961
+ ) -> dict[str, float] | None:
962
+ if not column_name:
963
+ return None
964
+ return self._normalize_players_playing_time(source.get(column_name))
965
+
966
+ @staticmethod
967
+ def _normalize_players_playing_time(raw_value: Any) -> dict[str, float] | None:
968
+ if raw_value is None:
969
+ return None
970
+
971
+ if isinstance(raw_value, str):
972
+ raw_text = raw_value
973
+ raw_value = raw_value.strip()
974
+ if not raw_value:
975
+ return None
976
+ try:
977
+ raw_value = json.loads(raw_value)
978
+ except json.JSONDecodeError as exc:
979
+ raise ValueError(
980
+ f"unable to parse playing time JSON {raw_text!r}: {exc}"
981
+ ) from exc
982
+
983
+ if isinstance(raw_value, Mapping):
984
+ normalized: dict[str, float] = {}
985
+ for key, value in raw_value.items():
986
+ if value is None:
987
+ continue
988
+ normalized[str(key)] = float(value)
989
+ return normalized or None
990
+
991
+ return None
992
+
951
993
  def _create_pre_match_players_collection(
952
994
  self, r: dict, stats_col: str, day_number: int, team_id: str
953
995
  ) -> PreMatchPlayersCollection:
@@ -994,17 +1036,12 @@ class PlayerRatingGenerator(RatingGenerator):
994
1036
  else None
995
1037
  )
996
1038
 
997
- team_playing_time = None
998
- opponent_playing_time = None
999
- if cn.team_players_playing_time:
1000
- raw_value = team_player.get(cn.team_players_playing_time)
1001
- if raw_value is not None:
1002
- team_playing_time = raw_value
1003
-
1004
- if cn.opponent_players_playing_time:
1005
- raw_value = team_player.get(cn.opponent_players_playing_time)
1006
- if raw_value is not None:
1007
- opponent_playing_time = raw_value
1039
+ team_playing_time = self._get_players_playing_time(
1040
+ team_player, cn.team_players_playing_time
1041
+ )
1042
+ opponent_playing_time = self._get_players_playing_time(
1043
+ team_player, cn.opponent_players_playing_time
1044
+ )
1008
1045
 
1009
1046
  mp = MatchPerformance(
1010
1047
  performance_value=perf_val,
@@ -1245,16 +1282,12 @@ class PlayerRatingGenerator(RatingGenerator):
1245
1282
  ppw = pw
1246
1283
  proj_w.append(float(ppw))
1247
1284
 
1248
- team_playing_time = None
1249
- opponent_playing_time = None
1250
- if cn.team_players_playing_time:
1251
- raw_value = tp.get(cn.team_players_playing_time)
1252
- if raw_value is not None:
1253
- team_playing_time = raw_value
1254
- if cn.opponent_players_playing_time:
1255
- raw_value = tp.get(cn.opponent_players_playing_time)
1256
- if raw_value is not None:
1257
- opponent_playing_time = raw_value
1285
+ team_playing_time = self._get_players_playing_time(
1286
+ tp, cn.team_players_playing_time
1287
+ )
1288
+ opponent_playing_time = self._get_players_playing_time(
1289
+ tp, cn.opponent_players_playing_time
1290
+ )
1258
1291
 
1259
1292
  mp = MatchPerformance(
1260
1293
  performance_value=get_perf_value(tp),
spforge/scorer/_score.py CHANGED
@@ -263,6 +263,7 @@ class BaseScorer(ABC):
263
263
  validation_column: str | None,
264
264
  filters: list[Filter] | None = None,
265
265
  aggregation_level: list[str] | None = None,
266
+ aggregation_method: dict[str, Any] | None = None,
266
267
  granularity: list[str] | None = None,
267
268
  compare_to_naive: bool = False,
268
269
  naive_granularity: list[str] | None = None,
@@ -274,6 +275,7 @@ class BaseScorer(ABC):
274
275
  If set, the scorer will be calculated only once the values of the validation column are equal to 1
275
276
  :param filters: The filters to apply before calculating
276
277
  :param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
278
+ :param aggregation_method: Aggregation methods for pred/target when aggregation_level is set.
277
279
  :param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
278
280
  """
279
281
  self.target = target
@@ -289,28 +291,59 @@ class BaseScorer(ABC):
289
291
  )
290
292
  )
291
293
  self.aggregation_level = aggregation_level
294
+ self.aggregation_method = aggregation_method
292
295
  self.granularity = granularity
293
296
  self.compare_to_naive = compare_to_naive
294
297
  self.naive_granularity = naive_granularity
295
298
 
299
+ def _resolve_aggregation_method(self, key: str) -> Any:
300
+ if self.aggregation_method is None:
301
+ return "sum"
302
+ method = self.aggregation_method.get(key)
303
+ if method is None:
304
+ return "sum"
305
+ return method
306
+
307
+ def _build_aggregation_expr(self, df: IntoFrameT, col: str, method: Any) -> Any:
308
+ if isinstance(method, tuple):
309
+ if len(method) != 2 or method[0] != "weighted_mean":
310
+ raise ValueError(f"Unsupported aggregation method for {col}: {method}")
311
+ weight_col = method[1]
312
+ if weight_col not in df.columns:
313
+ raise ValueError(
314
+ f"Aggregation weight column '{weight_col}' not found in dataframe columns."
315
+ )
316
+ weighted_sum = (nw.col(col) * nw.col(weight_col)).sum()
317
+ weight_total = nw.col(weight_col).sum()
318
+ return (weighted_sum / weight_total).alias(col)
319
+
320
+ if method == "sum":
321
+ return nw.col(col).sum().alias(col)
322
+ if method == "mean":
323
+ return nw.col(col).mean().alias(col)
324
+ if method == "first":
325
+ return nw.col(col).first().alias(col)
326
+ raise ValueError(f"Unsupported aggregation method for {col}: {method}")
327
+
296
328
  def _apply_aggregation_level(self, df: IntoFrameT) -> IntoFrameT:
297
329
  """Apply aggregation_level grouping if set"""
298
330
  if self.aggregation_level:
299
- # Determine aggregation method based on column types
300
- # For numeric columns, use sum; for others, use first or mean
301
- agg_exprs = []
302
- for col in [self.pred_column, self.target]:
303
- # Try to determine if numeric
304
- try:
305
- # Use sum for aggregation
306
- agg_exprs.append(nw.col(col).sum().alias(col))
307
- except Exception:
308
- # Fallback to mean or first
309
- agg_exprs.append(nw.col(col).mean().alias(col))
310
-
331
+ pred_method = self._resolve_aggregation_method("pred")
332
+ target_method = self._resolve_aggregation_method("target")
333
+ agg_exprs = [
334
+ self._build_aggregation_expr(df, self.pred_column, pred_method),
335
+ self._build_aggregation_expr(df, self.target, target_method),
336
+ ]
311
337
  df = df.group_by(self.aggregation_level).agg(agg_exprs)
312
338
  return df
313
339
 
340
+ @narwhals.narwhalify
341
+ def aggregate(self, df: IntoFrameT) -> IntoFrameT:
342
+ df = apply_filters(df, self.filters)
343
+ if not hasattr(df, "to_native"):
344
+ df = nw.from_native(df)
345
+ return self._apply_aggregation_level(df)
346
+
314
347
  def _get_granularity_groups(self, df: IntoFrameT) -> list[tuple]:
315
348
  """Get list of granularity tuples from dataframe"""
316
349
  if not self.granularity:
@@ -345,6 +378,7 @@ class PWMSE(BaseScorer):
345
378
  target: str,
346
379
  validation_column: str | None = None,
347
380
  aggregation_level: list[str] | None = None,
381
+ aggregation_method: dict[str, Any] | None = None,
348
382
  granularity: list[str] | None = None,
349
383
  filters: list[Filter] | None = None,
350
384
  labels: list[int] | None = None,
@@ -357,6 +391,7 @@ class PWMSE(BaseScorer):
357
391
  target=target,
358
392
  pred_column=pred_column,
359
393
  aggregation_level=aggregation_level,
394
+ aggregation_method=aggregation_method,
360
395
  granularity=granularity,
361
396
  filters=filters,
362
397
  validation_column=validation_column,
@@ -454,12 +489,7 @@ class PWMSE(BaseScorer):
454
489
 
455
490
  pass
456
491
  else:
457
- df = df.group_by(self.aggregation_level).agg(
458
- [
459
- nw.col(self.pred_column).mean().alias(self.pred_column),
460
- nw.col(self.target).mean().alias(self.target),
461
- ]
462
- )
492
+ df = self._apply_aggregation_level(df)
463
493
 
464
494
  if self.granularity:
465
495
  results = {}
@@ -517,6 +547,7 @@ class MeanBiasScorer(BaseScorer):
517
547
  target: str,
518
548
  validation_column: str | None = None,
519
549
  aggregation_level: list[str] | None = None,
550
+ aggregation_method: dict[str, Any] | None = None,
520
551
  granularity: list[str] | None = None,
521
552
  filters: list[Filter] | None = None,
522
553
  labels: list[int] | None = None,
@@ -540,6 +571,7 @@ class MeanBiasScorer(BaseScorer):
540
571
  target=target,
541
572
  pred_column=pred_column,
542
573
  aggregation_level=aggregation_level,
574
+ aggregation_method=aggregation_method,
543
575
  granularity=granularity,
544
576
  filters=filters,
545
577
  validation_column=validation_column,
@@ -582,12 +614,7 @@ class MeanBiasScorer(BaseScorer):
582
614
 
583
615
  # Apply aggregation_level if set
584
616
  if self.aggregation_level:
585
- df = df.group_by(self.aggregation_level).agg(
586
- [
587
- nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
588
- nw.col(self.target).sum().alias(self.target),
589
- ]
590
- )
617
+ df = self._apply_aggregation_level(df)
591
618
  # After group_by, ensure df is still a Narwhals DataFrame
592
619
  if not hasattr(df, "to_native"):
593
620
  df = nw.from_native(df)
@@ -658,6 +685,7 @@ class SklearnScorer(BaseScorer):
658
685
  target: str,
659
686
  validation_column: str | None = None,
660
687
  aggregation_level: list[str] | None = None,
688
+ aggregation_method: dict[str, Any] | None = None,
661
689
  granularity: list[str] | None = None,
662
690
  filters: list[Filter] | None = None,
663
691
  params: dict[str, Any] = None,
@@ -679,6 +707,7 @@ class SklearnScorer(BaseScorer):
679
707
  target=target,
680
708
  pred_column=pred_column,
681
709
  aggregation_level=aggregation_level,
710
+ aggregation_method=aggregation_method,
682
711
  granularity=granularity,
683
712
  filters=filters,
684
713
  validation_column=validation_column,
@@ -756,12 +785,7 @@ class SklearnScorer(BaseScorer):
756
785
  )
757
786
 
758
787
  if self.aggregation_level:
759
- df = df.group_by(self.aggregation_level).agg(
760
- [
761
- nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
762
- nw.col(self.target).sum().alias(self.target),
763
- ]
764
- )
788
+ df = self._apply_aggregation_level(df)
765
789
  if not hasattr(df, "to_native"):
766
790
  df = nw.from_native(df)
767
791
 
@@ -798,6 +822,7 @@ class ProbabilisticMeanBias(BaseScorer):
798
822
  class_column_name: str = "classes",
799
823
  validation_column: str | None = None,
800
824
  aggregation_level: list[str] | None = None,
825
+ aggregation_method: dict[str, Any] | None = None,
801
826
  granularity: list[str] | None = None,
802
827
  filters: list[Filter] | None = None,
803
828
  compare_to_naive: bool = False,
@@ -810,6 +835,7 @@ class ProbabilisticMeanBias(BaseScorer):
810
835
  target=target,
811
836
  pred_column=pred_column,
812
837
  aggregation_level=aggregation_level,
838
+ aggregation_method=aggregation_method,
813
839
  granularity=granularity,
814
840
  filters=filters,
815
841
  validation_column=validation_column,
@@ -817,6 +843,49 @@ class ProbabilisticMeanBias(BaseScorer):
817
843
  naive_granularity=naive_granularity,
818
844
  )
819
845
 
846
+ def _aggregate_pandas_series(
847
+ self, df: pd.DataFrame, col: str, method: Any
848
+ ) -> pd.Series:
849
+ grouped = df.groupby(self.aggregation_level, dropna=False)
850
+ if isinstance(method, tuple):
851
+ if len(method) != 2 or method[0] != "weighted_mean":
852
+ raise ValueError(f"Unsupported aggregation method for {col}: {method}")
853
+ weight_col = method[1]
854
+ if weight_col not in df.columns:
855
+ raise ValueError(
856
+ f"Aggregation weight column '{weight_col}' not found in dataframe columns."
857
+ )
858
+ return grouped.apply(
859
+ lambda g: (g[col] * g[weight_col]).sum() / g[weight_col].sum()
860
+ )
861
+
862
+ if method == "sum":
863
+ return grouped[col].sum()
864
+ if method == "mean":
865
+ return grouped[col].mean()
866
+ if method == "first":
867
+ return grouped[col].first()
868
+ raise ValueError(f"Unsupported aggregation method for {col}: {method}")
869
+
870
+ def _aggregate_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
871
+ if not self.aggregation_level:
872
+ return df
873
+ pred_method = self._resolve_aggregation_method("pred")
874
+ target_method = self._resolve_aggregation_method("target")
875
+ agg_df = pd.DataFrame(
876
+ {
877
+ self.pred_column: self._aggregate_pandas_series(
878
+ df, self.pred_column, pred_method
879
+ ),
880
+ self.target: self._aggregate_pandas_series(df, self.target, target_method),
881
+ self.class_column_name: df.groupby(self.aggregation_level, dropna=False)[
882
+ self.class_column_name
883
+ ].first(),
884
+ }
885
+ )
886
+ agg_df.reset_index(inplace=True)
887
+ return agg_df
888
+
820
889
  def _calculate_score_for_group(self, df: pd.DataFrame) -> float:
821
890
  """Calculate score for a single group (used for granularity)"""
822
891
  df = df.copy()
@@ -948,13 +1017,7 @@ class ProbabilisticMeanBias(BaseScorer):
948
1017
 
949
1018
  # Apply aggregation_level if set
950
1019
  if self.aggregation_level:
951
- df = (
952
- df.groupby(self.aggregation_level)
953
- .agg(
954
- {self.pred_column: "mean", self.target: "mean", self.class_column_name: "first"}
955
- )
956
- .reset_index()
957
- )
1020
+ df = self._aggregate_pandas(df)
958
1021
 
959
1022
  # If granularity is set, calculate separate scores per group
960
1023
  if self.granularity:
@@ -995,6 +1058,7 @@ class OrdinalLossScorer(BaseScorer):
995
1058
  classes: list[int],
996
1059
  validation_column: str | None = None,
997
1060
  aggregation_level: list[str] | None = None,
1061
+ aggregation_method: dict[str, Any] | None = None,
998
1062
  granularity: list[str] | None = None,
999
1063
  filters: list[Filter] | None = None,
1000
1064
  labels: list[int] | None = None,
@@ -1006,6 +1070,7 @@ class OrdinalLossScorer(BaseScorer):
1006
1070
  target=target,
1007
1071
  pred_column=pred_column,
1008
1072
  aggregation_level=aggregation_level,
1073
+ aggregation_method=aggregation_method,
1009
1074
  granularity=granularity,
1010
1075
  filters=filters,
1011
1076
  validation_column=validation_column,
@@ -1102,14 +1167,10 @@ class OrdinalLossScorer(BaseScorer):
1102
1167
  if not hasattr(df, "to_native"):
1103
1168
  df = nw.from_native(df)
1104
1169
 
1105
- df_native = df.to_native()
1106
- df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
1107
-
1108
1170
  # Filter out null and NaN targets
1109
- before = len(df_pl)
1110
- target_col = pl.col(self.target)
1111
- df_pl = df_pl.filter(target_col.is_not_null() & target_col.is_not_nan())
1112
- after = len(df_pl)
1171
+ before = len(df)
1172
+ df = _filter_nulls_and_nans(df, self.target)
1173
+ after = len(df)
1113
1174
  if before != after:
1114
1175
  _logger.info(
1115
1176
  "OrdinalLossScorer: Dropped %d rows with NaN target (%d → %d)",
@@ -1119,12 +1180,12 @@ class OrdinalLossScorer(BaseScorer):
1119
1180
  )
1120
1181
 
1121
1182
  if self.aggregation_level:
1122
- df_pl = df_pl.group_by(self.aggregation_level).agg(
1123
- [
1124
- pl.col(self.pred_column).mean().alias(self.pred_column),
1125
- pl.col(self.target).mean().alias(self.target),
1126
- ]
1127
- )
1183
+ df = self._apply_aggregation_level(df)
1184
+
1185
+ df_native = df.to_native()
1186
+ df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
1187
+ if df_pl.is_empty():
1188
+ return {} if self.granularity else 0.0
1128
1189
 
1129
1190
  if self.granularity:
1130
1191
  results = {}
@@ -1197,6 +1258,7 @@ class ThresholdEventScorer(BaseScorer):
1197
1258
  threshold_rounding: str = "ceil",
1198
1259
  validation_column: str | None = None,
1199
1260
  aggregation_level: list[str] | None = None,
1261
+ aggregation_method: dict[str, Any] | None = None,
1200
1262
  granularity: list[str] | None = None,
1201
1263
  filters: list["Filter"] | None = None,
1202
1264
  compare_to_naive: bool = False,
@@ -1207,6 +1269,7 @@ class ThresholdEventScorer(BaseScorer):
1207
1269
  target=self._EVENT_COL,
1208
1270
  pred_column=dist_column,
1209
1271
  aggregation_level=aggregation_level,
1272
+ aggregation_method=aggregation_method,
1210
1273
  granularity=granularity,
1211
1274
  filters=filters,
1212
1275
  validation_column=validation_column,
@@ -1227,6 +1290,7 @@ class ThresholdEventScorer(BaseScorer):
1227
1290
  target=self._EVENT_COL,
1228
1291
  pred_column=self._P_EVENT_COL,
1229
1292
  aggregation_level=aggregation_level,
1293
+ aggregation_method=aggregation_method,
1230
1294
  granularity=granularity,
1231
1295
  filters=None,
1232
1296
  validation_column=validation_column,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.25
3
+ Version: 0.8.27
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -51,7 +51,7 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
54
- spforge/ratings/_player_rating.py,sha256=zltf4utwzKQxkTA8DAPZ4LWRDlwGxoiKFaiPIo4sdNw,60323
54
+ spforge/ratings/_player_rating.py,sha256=TDw0LM-sLn27fprUhOW5csaDqAhzagoVm8SPKipZZmg,61106
55
55
  spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
@@ -62,7 +62,7 @@ spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH
62
62
  spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
63
63
  spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
64
64
  spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
65
- spforge/scorer/_score.py,sha256=kNuqiK3F5mUEAVD7KjWYY7E_AkRrspR362QBm_jyElg,57623
65
+ spforge/scorer/_score.py,sha256=jZPaKiSpb14Lzec4HWrK3CfpkqPaN33mriHUZosRozM,60566
66
66
  spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
67
67
  spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
68
68
  spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.25.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.27.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -94,20 +94,20 @@ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGF
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
95
95
  tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
96
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
97
- tests/ratings/test_player_rating_generator.py,sha256=Z66LN1-YdUHrS6dszWZf4HeENRyH8oEtu4Nlsh1MpMI,82442
97
+ tests/ratings/test_player_rating_generator.py,sha256=tpU83Orw1nlus29a0s9vc1pghL-f2rs642viW_6wFgk,83633
98
98
  tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
100
100
  tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
101
101
  tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
102
102
  tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
103
- tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
103
+ tests/scorer/test_score_aggregation_granularity.py,sha256=O5TRlG9UE4NBpF0tL_ywZKDmkMIorwrxgTegQ75Tr7A,15871
104
104
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
105
105
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
106
106
  tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
108
108
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
109
109
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
110
- spforge-0.8.25.dist-info/METADATA,sha256=JwBRy1-fD-a4UzeS_DeCv9AoXfbvbI7DghMls363RQ4,20048
111
- spforge-0.8.25.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
- spforge-0.8.25.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
- spforge-0.8.25.dist-info/RECORD,,
110
+ spforge-0.8.27.dist-info/METADATA,sha256=Bl0sOhG9rDGmQpTThxwNPlAKyXeR6dCxWlGmbHH0LN0,20048
111
+ spforge-0.8.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
+ spforge-0.8.27.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
+ spforge-0.8.27.dist-info/RECORD,,
@@ -4,7 +4,7 @@ import polars as pl
4
4
  import pytest
5
5
 
6
6
  from spforge import ColumnNames
7
- from spforge.data_structures import RatingState
7
+ from spforge.data_structures import PlayerRating, RatingState
8
8
  from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures, RatingUnknownFeatures
9
9
 
10
10
 
@@ -78,6 +78,39 @@ def test_fit_transform_updates_internal_state(base_cn, sample_df):
78
78
  assert "P1" in gen._player_off_ratings
79
79
  assert "P1" in gen._player_def_ratings
80
80
 
81
+
82
+ def test_pre_match_collection_parses_playing_time_json(base_cn):
83
+ """JSON strings in the team/opponent playing time columns should become dicts."""
84
+ from dataclasses import replace
85
+
86
+ cn = replace(
87
+ base_cn,
88
+ team_players_playing_time="team_pt",
89
+ opponent_players_playing_time="opp_pt",
90
+ )
91
+ gen = PlayerRatingGenerator(performance_column="perf", column_names=cn)
92
+ gen._player_off_ratings["P1"] = PlayerRating(id="P1", rating_value=100.0)
93
+ gen._player_def_ratings["P1"] = PlayerRating(id="P1", rating_value=100.0)
94
+
95
+ stats_entry = {
96
+ cn.player_id: "P1",
97
+ "perf": 0.75,
98
+ cn.participation_weight: 1.0,
99
+ cn.team_players_playing_time: '{"P1": 30}',
100
+ cn.opponent_players_playing_time: '{"P3": 25}',
101
+ }
102
+
103
+ collection = gen._create_pre_match_players_collection(
104
+ r={"__PLAYER_STATS": [stats_entry]},
105
+ stats_col="__PLAYER_STATS",
106
+ day_number=1,
107
+ team_id="T1",
108
+ )
109
+
110
+ match_perf = collection.pre_match_player_ratings[0].match_performance
111
+ assert match_perf.team_players_playing_time == {"P1": 30.0}
112
+ assert match_perf.opponent_players_playing_time == {"P3": 25.0}
113
+
81
114
  assert gen._player_off_ratings["P1"].rating_value > 0
82
115
 
83
116
 
@@ -21,6 +21,16 @@ def create_dataframe(df_type, data: dict):
21
21
  return df_type(data)
22
22
 
23
23
 
24
+ def to_pandas_df(df):
25
+ if hasattr(df, "to_native"):
26
+ df = df.to_native()
27
+ if isinstance(df, pd.DataFrame):
28
+ return df
29
+ if isinstance(df, pl.DataFrame):
30
+ return df.to_pandas()
31
+ raise TypeError(f"Unsupported dataframe type: {type(df)}")
32
+
33
+
24
34
  # ============================================================================
25
35
  # Aggregation Level Tests
26
36
  # ============================================================================
@@ -104,6 +114,65 @@ def test_pwmse_aggregation_level(df_type):
104
114
  assert score >= 0
105
115
 
106
116
 
117
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
118
+ def test_aggregate_returns_grouped_frame(df_type):
119
+ """aggregate returns filtered, grouped dataframe with default sum behavior"""
120
+ df = create_dataframe(
121
+ df_type,
122
+ {
123
+ "game_id": [1, 1, 1, 1],
124
+ "player_id": [1, 2, 3, 4],
125
+ "team_id": [1, 1, 2, 2],
126
+ "pred": [0.5, 0.6, 0.3, 0.4],
127
+ "target": [0, 1, 0, 1],
128
+ },
129
+ )
130
+
131
+ scorer = MeanBiasScorer(
132
+ pred_column="pred", target="target", aggregation_level=["game_id", "team_id"]
133
+ )
134
+ aggregated = to_pandas_df(scorer.aggregate(df))
135
+
136
+ assert len(aggregated) == 2
137
+ team1 = aggregated[aggregated["team_id"] == 1].iloc[0]
138
+ team2 = aggregated[aggregated["team_id"] == 2].iloc[0]
139
+ assert abs(team1["pred"] - 1.1) < 1e-10
140
+ assert abs(team1["target"] - 1.0) < 1e-10
141
+ assert abs(team2["pred"] - 0.7) < 1e-10
142
+ assert abs(team2["target"] - 1.0) < 1e-10
143
+
144
+
145
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
146
+ def test_weighted_mean_aggregation_method(df_type):
147
+ """Weighted mean aggregation uses provided weight column"""
148
+ df = create_dataframe(
149
+ df_type,
150
+ {
151
+ "game_id": [1, 1, 1],
152
+ "team_id": [1, 1, 1],
153
+ "pred": [0.4, 0.6, 0.9],
154
+ "target": [0.5, 0.7, 0.2],
155
+ "attempts": [10, 20, 30],
156
+ },
157
+ )
158
+
159
+ scorer = MeanBiasScorer(
160
+ pred_column="pred",
161
+ target="target",
162
+ aggregation_level=["game_id", "team_id"],
163
+ aggregation_method={
164
+ "pred": ("weighted_mean", "attempts"),
165
+ "target": ("weighted_mean", "attempts"),
166
+ },
167
+ )
168
+
169
+ score = scorer.score(df)
170
+ expected_pred = (0.4 * 10 + 0.6 * 20 + 0.9 * 30) / 60
171
+ expected_target = (0.5 * 10 + 0.7 * 20 + 0.2 * 30) / 60
172
+ expected = expected_pred - expected_target
173
+ assert abs(score - expected) < 1e-10
174
+
175
+
107
176
  # ============================================================================
108
177
  # Granularity Tests (Separate Scores Per Group)
109
178
  # ============================================================================