spforge 0.8.27__py3-none-any.whl → 0.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

@@ -166,6 +166,18 @@ class PlayerRatingGenerator(RatingGenerator):
166
166
  self.start_min_match_count_team_rating = start_min_match_count_team_rating
167
167
  self.start_hardcoded_start_rating = start_harcoded_start_rating
168
168
 
169
+ if hasattr(self._performance_predictor, '_reference_rating'):
170
+ effective_start = self.start_hardcoded_start_rating
171
+
172
+ if effective_start is None and self.start_league_ratings:
173
+ league_ratings = list(self.start_league_ratings.values())
174
+ effective_start = sum(league_ratings) / len(league_ratings)
175
+
176
+ if effective_start is None:
177
+ effective_start = 1000
178
+
179
+ self._performance_predictor._reference_rating = effective_start
180
+
169
181
  self.team_id_change_confidence_sum_decrease = team_id_change_confidence_sum_decrease
170
182
  self.column_names = column_names
171
183
 
@@ -517,6 +529,7 @@ class PlayerRatingGenerator(RatingGenerator):
517
529
  )
518
530
 
519
531
  perf_value = pre_player.match_performance.performance_value
532
+
520
533
  if perf_value is None:
521
534
  off_change = 0.0
522
535
  else:
@@ -610,6 +623,7 @@ class PlayerRatingGenerator(RatingGenerator):
610
623
  )
611
624
 
612
625
  perf_value = pre_player.match_performance.performance_value
626
+
613
627
  if perf_value is None:
614
628
  off_change = 0.0
615
629
  else:
@@ -31,6 +31,7 @@ class PlayerPerformancePredictor(ABC):
31
31
  pass
32
32
 
33
33
 
34
+
34
35
  class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
35
36
 
36
37
  def __init__(
@@ -38,18 +39,22 @@ class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
38
39
  coef: float = 0.0015,
39
40
  last_sample_count: int = 1500,
40
41
  min_count_for_historical_average: int = 200,
41
- historical_average_value_default: float = 1000,
42
42
  ):
43
43
  self.coef = coef
44
44
  self.last_sample_count = last_sample_count
45
45
  self.min_count_for_historical_average = min_count_for_historical_average
46
- self.historical_average_value_default = historical_average_value_default
47
46
  if self.min_count_for_historical_average < 1:
48
47
  raise ValueError("min_count_for_historical_average must be positive")
49
- self._prev_entries_ratings = []
48
+ self._reference_rating: float | None = None
50
49
 
51
50
  def reset(self):
52
- self._prev_entries_ratings = []
51
+ pass
52
+
53
+ def _get_reference_rating(self) -> float:
54
+ """Get reference rating from rating generator, or default to 1000."""
55
+ if self._reference_rating is not None:
56
+ return self._reference_rating
57
+ return 1000
53
58
 
54
59
  def predict_performance(
55
60
  self,
@@ -57,21 +62,14 @@ class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
57
62
  opponent_team_rating: PreMatchTeamRating,
58
63
  team_rating: PreMatchTeamRating,
59
64
  ) -> float:
60
- start_index = max(0, len(self._prev_entries_ratings) - self.last_sample_count)
61
- recent_prev_entries_ratings = self._prev_entries_ratings[start_index:]
62
- if len(recent_prev_entries_ratings) > self.min_count_for_historical_average:
63
- historical_average_rating = sum(recent_prev_entries_ratings) / len(
64
- recent_prev_entries_ratings
65
- )
66
- else:
67
- historical_average_rating = self.historical_average_value_default
65
+ historical_average_rating = self._get_reference_rating()
66
+
68
67
  net_mean_rating_over_historical_average = (
69
68
  player_rating.rating_value - historical_average_rating
70
69
  )
71
70
 
72
71
  value = self.coef * net_mean_rating_over_historical_average
73
72
  prediction = (math.exp(value)) / (1 + math.exp(value))
74
- self._prev_entries_ratings.append(player_rating.rating_value)
75
73
 
76
74
  return prediction
77
75
 
spforge/scorer/_score.py CHANGED
@@ -267,6 +267,7 @@ class BaseScorer(ABC):
267
267
  granularity: list[str] | None = None,
268
268
  compare_to_naive: bool = False,
269
269
  naive_granularity: list[str] | None = None,
270
+ _name_override: str | None = None,
270
271
  ):
271
272
  """
272
273
  :param target: The column name of the target
@@ -277,6 +278,9 @@ class BaseScorer(ABC):
277
278
  :param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
278
279
  :param aggregation_method: Aggregation methods for pred/target when aggregation_level is set.
279
280
  :param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
281
+ :param compare_to_naive: If True, returns naive_score - model_score (improvement over naive baseline)
282
+ :param naive_granularity: Granularity for computing naive baseline predictions
283
+ :param _name_override: Override auto-generated name (internal use)
280
284
  """
281
285
  self.target = target
282
286
  self.pred_column = pred_column
@@ -295,6 +299,7 @@ class BaseScorer(ABC):
295
299
  self.granularity = granularity
296
300
  self.compare_to_naive = compare_to_naive
297
301
  self.naive_granularity = naive_granularity
302
+ self._name_override = _name_override
298
303
 
299
304
  def _resolve_aggregation_method(self, key: str) -> Any:
300
305
  if self.aggregation_method is None:
@@ -359,6 +364,98 @@ class BaseScorer(ABC):
359
364
  mask = col_mask if mask is None else (mask & col_mask)
360
365
  return df.filter(mask)
361
366
 
367
+ def _get_scorer_id(self) -> str:
368
+ """Get scorer-specific identifier in snake_case. Override in subclasses if needed."""
369
+ import re
370
+ name = self.__class__.__name__
371
+ # Check if name is all uppercase (acronym like PWMSE)
372
+ if name.isupper():
373
+ return name.lower()
374
+ # Otherwise use regular snake_case conversion
375
+ return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
376
+
377
+ def _format_column_list(self, columns: list[str], max_display: int = 3) -> str:
378
+ """Format column list with abbreviation for long lists."""
379
+ if len(columns) <= max_display:
380
+ return "+".join(columns)
381
+ shown = "+".join(columns[:max_display])
382
+ remaining = len(columns) - max_display
383
+ return f"{shown}+{remaining}more"
384
+
385
+ def _sanitize_column_name(self, name: str) -> str:
386
+ """Replace special characters with underscores."""
387
+ import re
388
+ return re.sub(r'[^a-zA-Z0-9_]', '_', name)
389
+
390
+ def _count_user_filters(self) -> int:
391
+ """Count filters excluding auto-added validation filter."""
392
+ if not self.filters:
393
+ return 0
394
+ if self.validation_column is None:
395
+ return len(self.filters)
396
+ count = 0
397
+ for f in self.filters:
398
+ if not (f.column_name == self.validation_column and
399
+ f.operator == Operator.EQUALS and
400
+ f.value == 1):
401
+ count += 1
402
+ return count
403
+
404
+ def _generate_name(self) -> str:
405
+ """Generate readable name from scorer configuration."""
406
+ parts = []
407
+
408
+ parts.append(self._get_scorer_id())
409
+
410
+ parts.append(self._sanitize_column_name(self.target))
411
+
412
+ if self.granularity:
413
+ gran_str = self._format_column_list(self.granularity)
414
+ parts.append(f"gran:{gran_str}")
415
+
416
+ if self.compare_to_naive:
417
+ if self.naive_granularity:
418
+ naive_str = self._format_column_list(self.naive_granularity)
419
+ parts.append(f"naive:{naive_str}")
420
+ else:
421
+ parts.append("naive")
422
+
423
+ if self.aggregation_level:
424
+ agg_str = self._format_column_list(self.aggregation_level)
425
+ parts.append(f"agg:{agg_str}")
426
+
427
+ filter_count = self._count_user_filters()
428
+ if filter_count > 0:
429
+ parts.append(f"filters:{filter_count}")
430
+
431
+ return "_".join(parts)
432
+
433
+ @property
434
+ def name(self) -> str:
435
+ """
436
+ Generate a human-readable name for this scorer.
437
+
438
+ Returns descriptive name based on scorer configuration including
439
+ target, granularity, naive comparison, aggregation, and filters.
440
+ Only includes components that are actually set (non-None/non-empty).
441
+
442
+ Format: {scorer_id}_{target}[_gran:{cols}][_naive[:cols]][_agg:{cols}][_filters:{n}]
443
+
444
+ Can be overridden by passing _name_override to constructor.
445
+
446
+ Examples:
447
+ >>> scorer = MeanBiasScorer(target="points", pred_column="pred")
448
+ >>> scorer.name
449
+ 'mean_bias_scorer_points'
450
+
451
+ >>> scorer = MeanBiasScorer(target="points", granularity=["team_id"], compare_to_naive=True)
452
+ >>> scorer.name
453
+ 'mean_bias_scorer_points_gran:team_id_naive'
454
+ """
455
+ if hasattr(self, '_name_override') and self._name_override is not None:
456
+ return self._name_override
457
+ return self._generate_name()
458
+
362
459
  @abstractmethod
363
460
  def score(self, df: IntoFrameT) -> float | dict[tuple, float]:
364
461
  """
@@ -385,6 +482,7 @@ class PWMSE(BaseScorer):
385
482
  compare_to_naive: bool = False,
386
483
  naive_granularity: list[str] | None = None,
387
484
  evaluation_labels: list[int] | None = None,
485
+ _name_override: str | None = None,
388
486
  ):
389
487
  self.pred_column_name = pred_column
390
488
  super().__init__(
@@ -397,6 +495,7 @@ class PWMSE(BaseScorer):
397
495
  validation_column=validation_column,
398
496
  compare_to_naive=compare_to_naive,
399
497
  naive_granularity=naive_granularity,
498
+ _name_override=_name_override,
400
499
  )
401
500
  self.labels = labels
402
501
  self.evaluation_labels = evaluation_labels
@@ -553,6 +652,7 @@ class MeanBiasScorer(BaseScorer):
553
652
  labels: list[int] | None = None,
554
653
  compare_to_naive: bool = False,
555
654
  naive_granularity: list[str] | None = None,
655
+ _name_override: str | None = None,
556
656
  ):
557
657
  """
558
658
  :param pred_column: The column name of the predictions
@@ -563,6 +663,7 @@ class MeanBiasScorer(BaseScorer):
563
663
  :param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
564
664
  :param filters: The filters to apply before calculating
565
665
  :param labels: The labels corresponding to each index in probability distributions (e.g., [-5, -4, ..., 35] for rush yards)
666
+ :param _name_override: Override auto-generated name (internal use)
566
667
  """
567
668
 
568
669
  self.pred_column_name = pred_column
@@ -577,6 +678,7 @@ class MeanBiasScorer(BaseScorer):
577
678
  validation_column=validation_column,
578
679
  compare_to_naive=compare_to_naive,
579
680
  naive_granularity=naive_granularity,
681
+ _name_override=_name_override,
580
682
  )
581
683
 
582
684
  def _mean_bias_score(self, df: IntoFrameT) -> float:
@@ -691,6 +793,7 @@ class SklearnScorer(BaseScorer):
691
793
  params: dict[str, Any] = None,
692
794
  compare_to_naive: bool = False,
693
795
  naive_granularity: list[str] | None = None,
796
+ _name_override: str | None = None,
694
797
  ):
695
798
  """
696
799
  :param pred_column: The column name of the predictions
@@ -701,6 +804,7 @@ class SklearnScorer(BaseScorer):
701
804
  :param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
702
805
  :param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
703
806
  :param filters: The filters to apply before calculating
807
+ :param _name_override: Override auto-generated name (internal use)
704
808
  """
705
809
 
706
810
  super().__init__(
@@ -713,11 +817,22 @@ class SklearnScorer(BaseScorer):
713
817
  validation_column=validation_column,
714
818
  compare_to_naive=compare_to_naive,
715
819
  naive_granularity=naive_granularity,
820
+ _name_override=_name_override,
716
821
  )
717
822
  self.pred_column_name = pred_column
718
823
  self.scorer_function = scorer_function
719
824
  self.params = params or {}
720
825
 
826
+ def _get_scorer_id(self) -> str:
827
+ """Use the scorer function name."""
828
+ if hasattr(self.scorer_function, '__name__'):
829
+ name = self.scorer_function.__name__
830
+ # Handle lambda functions
831
+ if name == '<lambda>':
832
+ return "custom_metric"
833
+ return name
834
+ return "custom_metric"
835
+
721
836
  def _pad_probabilities(
722
837
  self, y_true: list[Any], probabilities: list[list[float]]
723
838
  ) -> tuple[list[list[float]], dict[str, Any]]:
@@ -827,6 +942,7 @@ class ProbabilisticMeanBias(BaseScorer):
827
942
  filters: list[Filter] | None = None,
828
943
  compare_to_naive: bool = False,
829
944
  naive_granularity: list[str] | None = None,
945
+ _name_override: str | None = None,
830
946
  ):
831
947
 
832
948
  self.pred_column_name = pred_column
@@ -841,6 +957,7 @@ class ProbabilisticMeanBias(BaseScorer):
841
957
  validation_column=validation_column,
842
958
  compare_to_naive=compare_to_naive,
843
959
  naive_granularity=naive_granularity,
960
+ _name_override=_name_override,
844
961
  )
845
962
 
846
963
  def _aggregate_pandas_series(
@@ -1064,6 +1181,7 @@ class OrdinalLossScorer(BaseScorer):
1064
1181
  labels: list[int] | None = None,
1065
1182
  compare_to_naive: bool = False,
1066
1183
  naive_granularity: list[str] | None = None,
1184
+ _name_override: str | None = None,
1067
1185
  ):
1068
1186
  self.pred_column_name = pred_column
1069
1187
  super().__init__(
@@ -1076,6 +1194,7 @@ class OrdinalLossScorer(BaseScorer):
1076
1194
  validation_column=validation_column,
1077
1195
  compare_to_naive=compare_to_naive,
1078
1196
  naive_granularity=naive_granularity,
1197
+ _name_override=_name_override,
1079
1198
  )
1080
1199
  self.classes = classes
1081
1200
 
@@ -1263,6 +1382,7 @@ class ThresholdEventScorer(BaseScorer):
1263
1382
  filters: list["Filter"] | None = None,
1264
1383
  compare_to_naive: bool = False,
1265
1384
  naive_granularity: list[str] | None = None,
1385
+ _name_override: str | None = None,
1266
1386
  ):
1267
1387
  self.pred_column_name = dist_column
1268
1388
  super().__init__(
@@ -1275,6 +1395,7 @@ class ThresholdEventScorer(BaseScorer):
1275
1395
  validation_column=validation_column,
1276
1396
  compare_to_naive=compare_to_naive,
1277
1397
  naive_granularity=naive_granularity,
1398
+ _name_override=_name_override,
1278
1399
  )
1279
1400
 
1280
1401
  self.dist_column = dist_column
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.27
3
+ Version: 0.8.29
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -51,18 +51,18 @@ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
54
- spforge/ratings/_player_rating.py,sha256=TDw0LM-sLn27fprUhOW5csaDqAhzagoVm8SPKipZZmg,61106
54
+ spforge/ratings/_player_rating.py,sha256=y6i7vv3RcNpYmcjBelu_lJXQmZQ4kOgswXeRwyc7ieY,61621
55
55
  spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
58
58
  spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
59
- spforge/ratings/player_performance_predictor.py,sha256=UPzOEbougHT6FcmOiuTa3vEM6q8FZq-SjKb0AqD0JS4,8365
59
+ spforge/ratings/player_performance_predictor.py,sha256=GtPpYlALgbQk8YHeaiRbpRvJHxeAhKpRxsaVUc9zR5o,7963
60
60
  spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
61
61
  spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
62
62
  spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
63
63
  spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
64
64
  spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
65
- spforge/scorer/_score.py,sha256=jZPaKiSpb14Lzec4HWrK3CfpkqPaN33mriHUZosRozM,60566
65
+ spforge/scorer/_score.py,sha256=rGbzTiiS0KVbsGgJ742JoLDEoK79LbgTTuas6XHSZpw,65370
66
66
  spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
67
67
  spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
68
68
  spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.27.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.29.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -94,20 +94,21 @@ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGF
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
95
95
  tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
96
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
97
- tests/ratings/test_player_rating_generator.py,sha256=tpU83Orw1nlus29a0s9vc1pghL-f2rs642viW_6wFgk,83633
97
+ tests/ratings/test_player_rating_generator.py,sha256=gfNb2OcxGbs9MrPNZj_ShBk5VwLHNxXliUF9bsrvHcE,96836
98
98
  tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
100
100
  tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
101
101
  tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
102
102
  tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
103
103
  tests/scorer/test_score_aggregation_granularity.py,sha256=O5TRlG9UE4NBpF0tL_ywZKDmkMIorwrxgTegQ75Tr7A,15871
104
+ tests/scorer/test_scorer_name.py,sha256=puwlfy_tdtFUfcWdRqUNJcytSIDlbBnksFbqqXHgOBg,10347
104
105
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
105
106
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
106
107
  tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
108
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
108
109
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
109
110
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
110
- spforge-0.8.27.dist-info/METADATA,sha256=Bl0sOhG9rDGmQpTThxwNPlAKyXeR6dCxWlGmbHH0LN0,20048
111
- spforge-0.8.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
- spforge-0.8.27.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
- spforge-0.8.27.dist-info/RECORD,,
111
+ spforge-0.8.29.dist-info/METADATA,sha256=T-ruW0iWlC_xBOBpvNf6lBI55ErBY5clTPmkahthCLI,20048
112
+ spforge-0.8.29.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
+ spforge-0.8.29.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
114
+ spforge-0.8.29.dist-info/RECORD,,
@@ -4,7 +4,12 @@ import polars as pl
4
4
  import pytest
5
5
 
6
6
  from spforge import ColumnNames
7
- from spforge.data_structures import PlayerRating, RatingState
7
+ from spforge.data_structures import (
8
+ MatchPerformance,
9
+ PlayerRating,
10
+ PreMatchPlayerRating,
11
+ PreMatchTeamRating,
12
+ )
8
13
  from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures, RatingUnknownFeatures
9
14
 
10
15
 
@@ -138,6 +143,60 @@ def test_fit_transform_participation_weight_scaling(base_cn):
138
143
  assert half_rating > 0
139
144
 
140
145
 
146
+ def test_player_rating_generator_team_rating_coef_affects_predictor(base_cn):
147
+ """Passing a team rating coefficient should affect the predictor used by the generator."""
148
+ generator = PlayerRatingGenerator(
149
+ performance_column="perf",
150
+ column_names=base_cn,
151
+ performance_predictor="difference",
152
+ team_rating_diff_coef=0.5,
153
+ rating_diff_coef=0.0,
154
+ rating_diff_team_from_entity_coef=0.0,
155
+ )
156
+
157
+ predictor = generator._performance_predictor
158
+ match_perf = MatchPerformance(
159
+ performance_value=0.5,
160
+ participation_weight=1.0,
161
+ projected_participation_weight=1.0,
162
+ )
163
+ player_rating = PreMatchPlayerRating(
164
+ id="P1",
165
+ rating_value=100.0,
166
+ games_played=1,
167
+ league=None,
168
+ position=None,
169
+ match_performance=match_perf,
170
+ )
171
+ opponent_team_rating = PreMatchTeamRating(id="Opp", players=[], rating_value=100.0)
172
+ high_team_rating = PreMatchTeamRating(
173
+ id="TeamHigh",
174
+ players=[player_rating],
175
+ rating_value=110.0,
176
+ )
177
+ low_team_rating = PreMatchTeamRating(
178
+ id="TeamLow",
179
+ players=[player_rating],
180
+ rating_value=90.0,
181
+ )
182
+
183
+ high_pred = predictor.predict_performance(
184
+ player_rating=player_rating,
185
+ opponent_team_rating=opponent_team_rating,
186
+ team_rating=high_team_rating,
187
+ )
188
+ low_pred = predictor.predict_performance(
189
+ player_rating=player_rating,
190
+ opponent_team_rating=opponent_team_rating,
191
+ team_rating=low_team_rating,
192
+ )
193
+
194
+ assert predictor.team_rating_diff_coef == 0.5
195
+ assert high_pred > low_pred
196
+ assert high_pred > 0.5
197
+ assert low_pred < 0.5
198
+
199
+
141
200
  def test_fit_transform_batch_update_logic(base_cn):
142
201
  """Test that ratings do not update between matches if update_match_id is the same."""
143
202
  from dataclasses import replace
@@ -2352,3 +2411,302 @@ def test_fit_transform_backward_compatible_without_playing_time_columns(base_cn)
2352
2411
  # Ratings should be updated normally
2353
2412
  assert gen._player_off_ratings["P1"].rating_value != 1000.0
2354
2413
  assert gen._player_off_ratings["P3"].rating_value > gen._player_off_ratings["P4"].rating_value
2414
+
2415
+
2416
+ def test_fit_transform_ignore_opponent_predictor_adapts_to_performance_drift(base_cn):
2417
+ """
2418
+ Test that PlayerRatingNonOpponentPerformancePredictor converges to actual
2419
+ performance with fixed reference (not stuck at 0.5 like rolling average).
2420
+
2421
+ With pre-scaled data (mean=0.48 ≠ 0.5), predictions should converge to 0.48,
2422
+ not stay stuck at 0.5. This verifies the fixed reference allows convergence.
2423
+ """
2424
+ import numpy as np
2425
+
2426
+ np.random.seed(42) # Reproducible test
2427
+ n_matches = 1500
2428
+ n_players_per_team = 5
2429
+ n_teams = 2
2430
+
2431
+ # Target mean intentionally NOT 0.5 to test convergence
2432
+ target_mean = 0.48
2433
+
2434
+ data = {
2435
+ "pid": [],
2436
+ "tid": [],
2437
+ "mid": [],
2438
+ "dt": [],
2439
+ "perf": [],
2440
+ "pw": [],
2441
+ }
2442
+
2443
+ match_id = 0
2444
+ for i in range(n_matches // 2):
2445
+ date = datetime(2019, 1, 1) + timedelta(days=i * 2)
2446
+ date_str = date.strftime("%Y-%m-%d")
2447
+
2448
+ # Generate performance data already in [0,1] with mean at target
2449
+ # Small std to keep values tightly around target mean
2450
+ for team_idx in range(n_teams):
2451
+ team_id = f"T{team_idx + 1}"
2452
+ for player_idx in range(n_players_per_team):
2453
+ player_id = f"P{team_idx}_{player_idx}"
2454
+ # Draw from normal distribution, clip to [0,1]
2455
+ perf = np.random.normal(target_mean, 0.08)
2456
+ perf = max(0.0, min(1.0, perf))
2457
+
2458
+ data["pid"].append(player_id)
2459
+ data["tid"].append(team_id)
2460
+ data["mid"].append(f"M{match_id}")
2461
+ data["dt"].append(date_str)
2462
+ data["perf"].append(perf)
2463
+ data["pw"].append(1.0)
2464
+
2465
+ match_id += 1
2466
+
2467
+ df = pl.DataFrame(data)
2468
+
2469
+ # Verify input data has mean ≠ 0.5 (before any scaling)
2470
+ input_mean = sum(data["perf"]) / len(data["perf"])
2471
+ assert abs(input_mean - target_mean) < 0.01, f"Input data mean should be ~{target_mean}"
2472
+
2473
+ # Use ignore_opponent predictor with fixed reference
2474
+ # CRITICAL: auto_scale_performance=False to preserve the input mean
2475
+ gen = PlayerRatingGenerator(
2476
+ performance_column="perf",
2477
+ column_names=base_cn,
2478
+ performance_predictor="ignore_opponent",
2479
+ auto_scale_performance=False, # Keep input mean at 0.48
2480
+ start_harcoded_start_rating=1000.0,
2481
+ rating_change_multiplier_offense=100, # Faster convergence for test
2482
+ rating_change_multiplier_defense=100,
2483
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_PERFORMANCE],
2484
+ )
2485
+
2486
+ result = gen.fit_transform(df)
2487
+
2488
+ # Check tail (after convergence period)
2489
+ tail_start_match = (n_matches // 2) - 200
2490
+ tail_df = result.filter(
2491
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) >= tail_start_match
2492
+ )
2493
+
2494
+ tail_actual = tail_df["perf"].to_list()
2495
+ tail_preds = tail_df["player_predicted_performance_perf"].to_list()
2496
+
2497
+ mean_actual = sum(tail_actual) / len(tail_actual)
2498
+ mean_pred = sum(tail_preds) / len(tail_preds)
2499
+
2500
+ # With fixed reference, predictions should converge close to actual mean
2501
+ deviation = abs(mean_pred - mean_actual)
2502
+ assert deviation < 0.015, (
2503
+ f"Mean predicted performance {mean_pred:.4f} deviates from "
2504
+ f"actual mean {mean_actual:.4f} by {deviation:.4f}. "
2505
+ f"With fixed reference, predictions should converge to actual performance mean."
2506
+ )
2507
+
2508
+ # Verify we're not stuck at 0.5 (the original rolling average bug)
2509
+ assert abs(mean_pred - 0.5) > 0.01, (
2510
+ f"Mean predicted performance {mean_pred:.4f} is too close to 0.5. "
2511
+ f"System appears stuck at sigmoid midpoint (original rolling average bug)."
2512
+ )
2513
+
2514
+
2515
+ def test_fit_transform_ignore_opponent_with_autoscale_and_temporal_drift(base_cn):
2516
+ """
2517
+ Test that fixed reference works with auto_scale_performance=True and temporal drift.
2518
+
2519
+ With balanced data (overall mean=0.5) and temporal drift (early=0.505, late=0.495):
2520
+ - Auto_scale preserves overall mean at 0.5
2521
+ - Predictions track the SCALED values (not raw 0.505/0.495)
2522
+ - Drift is preserved (early predictions > late predictions)
2523
+ """
2524
+ import numpy as np
2525
+
2526
+ np.random.seed(42)
2527
+ n_matches = 1000
2528
+ n_players_per_team = 5
2529
+
2530
+ data = {
2531
+ "pid": [],
2532
+ "tid": [],
2533
+ "mid": [],
2534
+ "dt": [],
2535
+ "perf": [],
2536
+ "pw": [],
2537
+ }
2538
+
2539
+ match_id = 0
2540
+ for i in range(n_matches // 2):
2541
+ date = datetime(2019, 1, 1) + timedelta(days=i * 2)
2542
+ date_str = date.strftime("%Y-%m-%d")
2543
+
2544
+ # Temporal drift: 0.505 -> 0.495 (overall mean = 0.5)
2545
+ progress = i / (n_matches // 2)
2546
+ period_mean = 0.505 - (0.01 * progress)
2547
+
2548
+ for team_idx in range(2):
2549
+ team_id = f"T{team_idx + 1}"
2550
+ for player_idx in range(n_players_per_team):
2551
+ player_id = f"P{team_idx}_{player_idx}"
2552
+ # Add variance around period mean
2553
+ perf = np.random.normal(period_mean, 0.03)
2554
+ perf = max(0.0, min(1.0, perf))
2555
+
2556
+ data["pid"].append(player_id)
2557
+ data["tid"].append(team_id)
2558
+ data["mid"].append(f"M{match_id}")
2559
+ data["dt"].append(date_str)
2560
+ data["perf"].append(perf)
2561
+ data["pw"].append(1.0)
2562
+
2563
+ match_id += 1
2564
+
2565
+ df = pl.DataFrame(data)
2566
+
2567
+ # Verify raw data is balanced
2568
+ raw_mean = sum(data["perf"]) / len(data["perf"])
2569
+ assert abs(raw_mean - 0.5) < 0.01, f"Raw data should have mean ≈ 0.5, got {raw_mean}"
2570
+
2571
+ gen = PlayerRatingGenerator(
2572
+ performance_column="perf",
2573
+ column_names=base_cn,
2574
+ performance_predictor="ignore_opponent",
2575
+ auto_scale_performance=True, # ← Key: with auto_scale
2576
+ start_harcoded_start_rating=1000.0,
2577
+ rating_change_multiplier_offense=100,
2578
+ rating_change_multiplier_defense=100,
2579
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_PERFORMANCE],
2580
+ )
2581
+
2582
+ result = gen.fit_transform(df)
2583
+
2584
+ # Check that auto_scale created the performance column
2585
+ assert "performance__perf" in result.columns
2586
+
2587
+ # Get overall scaled mean
2588
+ all_scaled = result["performance__perf"].to_list()
2589
+ overall_scaled_mean = sum(all_scaled) / len(all_scaled)
2590
+
2591
+ # Verify overall scaled mean ≈ 0.5 (auto_scale preserves balance)
2592
+ assert abs(overall_scaled_mean - 0.5) < 0.01, (
2593
+ f"Auto_scale should preserve overall mean at 0.5, got {overall_scaled_mean}"
2594
+ )
2595
+
2596
+ # Get early and late periods
2597
+ early_df = result.filter(
2598
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) < 100
2599
+ )
2600
+ late_df = result.filter(
2601
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) >= (n_matches//2 - 100)
2602
+ )
2603
+
2604
+ early_actual_scaled = early_df["performance__perf"].to_list()
2605
+ early_preds = early_df["player_predicted_performance_perf"].to_list()
2606
+ late_actual_scaled = late_df["performance__perf"].to_list()
2607
+ late_preds = late_df["player_predicted_performance_perf"].to_list()
2608
+
2609
+ early_actual_mean = sum(early_actual_scaled) / len(early_actual_scaled)
2610
+ early_pred_mean = sum(early_preds) / len(early_preds)
2611
+ late_actual_mean = sum(late_actual_scaled) / len(late_actual_scaled)
2612
+ late_pred_mean = sum(late_preds) / len(late_preds)
2613
+
2614
+ # Verify drift is preserved after scaling (strict bounds based on 0.505→0.495 drift)
2615
+ assert early_actual_mean > 0.51, (
2616
+ f"Early period should be > 0.51 after scaling, got {early_actual_mean:.4f}"
2617
+ )
2618
+ assert late_actual_mean < 0.49, (
2619
+ f"Late period should be < 0.49 after scaling, got {late_actual_mean:.4f}"
2620
+ )
2621
+
2622
+ # Verify drift magnitude is significant
2623
+ drift_magnitude = early_actual_mean - late_actual_mean
2624
+ assert drift_magnitude > 0.02, (
2625
+ f"Drift magnitude should be > 0.02, got {drift_magnitude:.4f}"
2626
+ )
2627
+
2628
+ # Verify predictions track the SCALED values (not raw 0.505/0.495)
2629
+ # Tolerance: 0.025 accounts for convergence lag with temporal drift
2630
+ early_deviation = abs(early_pred_mean - early_actual_mean)
2631
+ late_deviation = abs(late_pred_mean - late_actual_mean)
2632
+
2633
+ assert early_deviation < 0.025, (
2634
+ f"Early predictions should converge to scaled actual ({early_actual_mean:.4f}), "
2635
+ f"got {early_pred_mean:.4f}, deviation={early_deviation:.4f}"
2636
+ )
2637
+ assert late_deviation < 0.025, (
2638
+ f"Late predictions should converge to scaled actual ({late_actual_mean:.4f}), "
2639
+ f"got {late_pred_mean:.4f}, deviation={late_deviation:.4f}"
2640
+ )
2641
+
2642
+ # Verify drift is tracked in predictions
2643
+ assert early_pred_mean > late_pred_mean, (
2644
+ f"Predictions should track temporal drift: early ({early_pred_mean:.4f}) > late ({late_pred_mean:.4f})"
2645
+ )
2646
+
2647
+
2648
+ def test_ignore_opponent_predictor_reference_rating_set_correctly(base_cn):
2649
+ """
2650
+ Test that PlayerRatingNonOpponentPerformancePredictor._reference_rating
2651
+ is set correctly from start rating parameters.
2652
+ """
2653
+ # Test 1: With hardcoded start rating
2654
+ gen1 = PlayerRatingGenerator(
2655
+ performance_column="perf",
2656
+ column_names=base_cn,
2657
+ performance_predictor="ignore_opponent",
2658
+ auto_scale_performance=True,
2659
+ start_harcoded_start_rating=1100.0,
2660
+ )
2661
+ assert gen1._performance_predictor._reference_rating == 1100.0, (
2662
+ f"Expected reference rating 1100.0, got {gen1._performance_predictor._reference_rating}"
2663
+ )
2664
+
2665
+ # Test 2: Without hardcoded start (should default to 1000)
2666
+ gen2 = PlayerRatingGenerator(
2667
+ performance_column="perf",
2668
+ column_names=base_cn,
2669
+ performance_predictor="ignore_opponent",
2670
+ auto_scale_performance=True,
2671
+ )
2672
+ assert gen2._performance_predictor._reference_rating == 1000.0, (
2673
+ f"Expected reference rating 1000.0, got {gen2._performance_predictor._reference_rating}"
2674
+ )
2675
+
2676
+ # Test 3: With league ratings (single league)
2677
+ gen3 = PlayerRatingGenerator(
2678
+ performance_column="perf",
2679
+ column_names=base_cn,
2680
+ performance_predictor="ignore_opponent",
2681
+ auto_scale_performance=True,
2682
+ start_league_ratings={"NBA": 1150},
2683
+ )
2684
+ assert gen3._performance_predictor._reference_rating == 1150.0, (
2685
+ f"Expected reference rating 1150.0, got {gen3._performance_predictor._reference_rating}"
2686
+ )
2687
+
2688
+ # Test 4: With multiple league ratings (should use mean)
2689
+ gen4 = PlayerRatingGenerator(
2690
+ performance_column="perf",
2691
+ column_names=base_cn,
2692
+ performance_predictor="ignore_opponent",
2693
+ auto_scale_performance=True,
2694
+ start_league_ratings={"NBA": 1100, "G-League": 900, "EuroLeague": 1000},
2695
+ )
2696
+ expected_mean = (1100 + 900 + 1000) / 3
2697
+ assert gen4._performance_predictor._reference_rating == expected_mean, (
2698
+ f"Expected reference rating {expected_mean}, got {gen4._performance_predictor._reference_rating}"
2699
+ )
2700
+
2701
+ # Test 5: Hardcoded start rating takes precedence over league ratings
2702
+ gen5 = PlayerRatingGenerator(
2703
+ performance_column="perf",
2704
+ column_names=base_cn,
2705
+ performance_predictor="ignore_opponent",
2706
+ auto_scale_performance=True,
2707
+ start_harcoded_start_rating=1200.0,
2708
+ start_league_ratings={"NBA": 1100},
2709
+ )
2710
+ assert gen5._performance_predictor._reference_rating == 1200.0, (
2711
+ f"Expected hardcoded start rating 1200.0 to take precedence, got {gen5._performance_predictor._reference_rating}"
2712
+ )
@@ -0,0 +1,292 @@
1
+ import pytest
2
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
3
+
4
+ from spforge.scorer import (
5
+ Filter,
6
+ MeanBiasScorer,
7
+ Operator,
8
+ OrdinalLossScorer,
9
+ SklearnScorer,
10
+ )
11
+ from spforge.scorer._score import (
12
+ PWMSE,
13
+ ProbabilisticMeanBias,
14
+ ThresholdEventScorer,
15
+ )
16
+
17
+
18
+ class TestScorerNameProperty:
19
+ """Test the auto-generated name property for all scorers."""
20
+
21
+ def test_simple_mean_bias_scorer(self):
22
+ scorer = MeanBiasScorer(target="points", pred_column="pred")
23
+ assert scorer.name == "mean_bias_scorer_points"
24
+
25
+ def test_simple_pwmse(self):
26
+ scorer = PWMSE(target="goals", pred_column="pred", labels=list(range(10)))
27
+ assert scorer.name == "pwmse_goals"
28
+
29
+ def test_simple_ordinal_loss(self):
30
+ scorer = OrdinalLossScorer(target="points", pred_column="pred", classes=list(range(0, 41)))
31
+ assert scorer.name == "ordinal_loss_scorer_points"
32
+
33
+ def test_simple_sklearn_scorer(self):
34
+ scorer = SklearnScorer(target="yards", pred_column="pred", scorer_function=mean_absolute_error)
35
+ assert scorer.name == "mean_absolute_error_yards"
36
+
37
+ def test_simple_probabilistic_mean_bias(self):
38
+ scorer = ProbabilisticMeanBias(target="points", pred_column="pred")
39
+ assert scorer.name == "probabilistic_mean_bias_points"
40
+
41
+ def test_simple_threshold_event_scorer(self):
42
+ scorer = ThresholdEventScorer(
43
+ dist_column="dist",
44
+ threshold_column="threshold",
45
+ outcome_column="outcome",
46
+ labels=list(range(10))
47
+ )
48
+ assert scorer.name == "threshold_event_scorer___event__"
49
+
50
+ def test_with_single_granularity(self):
51
+ scorer = MeanBiasScorer(target="points", pred_column="pred", granularity=["team_id"])
52
+ assert scorer.name == "mean_bias_scorer_points_gran:team_id"
53
+
54
+ def test_with_multiple_granularity(self):
55
+ scorer = MeanBiasScorer(
56
+ target="points",
57
+ pred_column="pred",
58
+ granularity=["game_id", "team_id"]
59
+ )
60
+ assert scorer.name == "mean_bias_scorer_points_gran:game_id+team_id"
61
+
62
+ def test_with_long_granularity_abbreviated(self):
63
+ scorer = MeanBiasScorer(
64
+ target="points",
65
+ pred_column="pred",
66
+ granularity=["col1", "col2", "col3", "col4", "col5"]
67
+ )
68
+ assert scorer.name == "mean_bias_scorer_points_gran:col1+col2+col3+2more"
69
+
70
+ def test_with_naive_comparison_no_granularity(self):
71
+ scorer = SklearnScorer(
72
+ target="goals",
73
+ pred_column="pred",
74
+ scorer_function=mean_absolute_error,
75
+ compare_to_naive=True
76
+ )
77
+ assert scorer.name == "mean_absolute_error_goals_naive"
78
+
79
+ def test_with_naive_comparison_with_naive_granularity(self):
80
+ scorer = MeanBiasScorer(
81
+ target="yards",
82
+ pred_column="pred",
83
+ compare_to_naive=True,
84
+ naive_granularity=["season"]
85
+ )
86
+ assert scorer.name == "mean_bias_scorer_yards_naive:season"
87
+
88
+ def test_with_aggregation_level(self):
89
+ scorer = MeanBiasScorer(
90
+ target="yards",
91
+ pred_column="pred",
92
+ aggregation_level=["game_id", "player_id"]
93
+ )
94
+ assert scorer.name == "mean_bias_scorer_yards_agg:game_id+player_id"
95
+
96
+ def test_with_user_filters_only(self):
97
+ scorer = MeanBiasScorer(
98
+ target="yards",
99
+ pred_column="pred",
100
+ filters=[
101
+ Filter("minutes", 0, Operator.GREATER_THAN),
102
+ Filter("position", "QB", Operator.EQUALS)
103
+ ]
104
+ )
105
+ assert scorer.name == "mean_bias_scorer_yards_filters:2"
106
+
107
+ def test_validation_column_not_counted_in_filters(self):
108
+ scorer = MeanBiasScorer(
109
+ target="yards",
110
+ pred_column="pred",
111
+ validation_column="is_valid",
112
+ filters=[Filter("minutes", 0, Operator.GREATER_THAN)]
113
+ )
114
+ # Should only count the minutes filter, not the auto-added validation filter
115
+ assert scorer.name == "mean_bias_scorer_yards_filters:1"
116
+
117
+ def test_validation_column_alone_not_shown(self):
118
+ scorer = MeanBiasScorer(
119
+ target="yards",
120
+ pred_column="pred",
121
+ validation_column="is_valid"
122
+ )
123
+ # Validation filter auto-added but not counted
124
+ assert scorer.name == "mean_bias_scorer_yards"
125
+
126
+ def test_complex_configuration_all_components(self):
127
+ scorer = MeanBiasScorer(
128
+ target="yards",
129
+ pred_column="pred",
130
+ granularity=["game_id", "team_id"],
131
+ compare_to_naive=True,
132
+ naive_granularity=["season"],
133
+ aggregation_level=["game_id", "player_id"],
134
+ filters=[Filter("minutes", 0, Operator.GREATER_THAN)]
135
+ )
136
+ assert scorer.name == "mean_bias_scorer_yards_gran:game_id+team_id_naive:season_agg:game_id+player_id_filters:1"
137
+
138
+ def test_sklearn_with_different_function(self):
139
+ scorer = SklearnScorer(
140
+ target="points",
141
+ pred_column="pred",
142
+ scorer_function=mean_squared_error
143
+ )
144
+ assert scorer.name == "mean_squared_error_points"
145
+
146
+ def test_sklearn_with_lambda_fallback(self):
147
+ scorer = SklearnScorer(
148
+ target="points",
149
+ pred_column="pred",
150
+ scorer_function=lambda y_true, y_pred: 0.0
151
+ )
152
+ assert scorer.name == "custom_metric_points"
153
+
154
+ def test_special_characters_sanitized(self):
155
+ scorer = MeanBiasScorer(target="points-per-game", pred_column="pred")
156
+ assert scorer.name == "mean_bias_scorer_points_per_game"
157
+
158
+ def test_special_characters_in_target_sanitized(self):
159
+ scorer = MeanBiasScorer(target="pass/run_ratio", pred_column="pred")
160
+ assert scorer.name == "mean_bias_scorer_pass_run_ratio"
161
+
162
+ def test_name_override(self):
163
+ scorer = MeanBiasScorer(
164
+ target="points",
165
+ pred_column="pred",
166
+ granularity=["team_id"],
167
+ _name_override="custom_name"
168
+ )
169
+ assert scorer.name == "custom_name"
170
+
171
+ def test_consistency_across_repeated_calls(self):
172
+ scorer = MeanBiasScorer(
173
+ target="yards",
174
+ pred_column="pred",
175
+ granularity=["game_id"],
176
+ compare_to_naive=True
177
+ )
178
+ name1 = scorer.name
179
+ name2 = scorer.name
180
+ name3 = scorer.name
181
+ assert name1 == name2 == name3
182
+
183
+ def test_different_scorers_different_names(self):
184
+ scorer1 = MeanBiasScorer(target="points", pred_column="pred")
185
+ scorer2 = PWMSE(target="points", pred_column="pred", labels=list(range(10)))
186
+ assert scorer1.name != scorer2.name
187
+
188
+ def test_same_config_same_name(self):
189
+ scorer1 = MeanBiasScorer(
190
+ target="points",
191
+ pred_column="pred",
192
+ granularity=["team_id"]
193
+ )
194
+ scorer2 = MeanBiasScorer(
195
+ target="points",
196
+ pred_column="pred_2", # Different pred column shouldn't affect name
197
+ granularity=["team_id"]
198
+ )
199
+ assert scorer1.name == scorer2.name
200
+
201
+ def test_none_granularity_excluded(self):
202
+ scorer = MeanBiasScorer(
203
+ target="points",
204
+ pred_column="pred",
205
+ granularity=None
206
+ )
207
+ assert "gran:" not in scorer.name
208
+ assert scorer.name == "mean_bias_scorer_points"
209
+
210
+ def test_empty_filters_excluded(self):
211
+ scorer = MeanBiasScorer(
212
+ target="points",
213
+ pred_column="pred",
214
+ filters=[]
215
+ )
216
+ assert "filters:" not in scorer.name
217
+ assert scorer.name == "mean_bias_scorer_points"
218
+
219
+ def test_none_aggregation_level_excluded(self):
220
+ scorer = MeanBiasScorer(
221
+ target="points",
222
+ pred_column="pred",
223
+ aggregation_level=None
224
+ )
225
+ assert "agg:" not in scorer.name
226
+ assert scorer.name == "mean_bias_scorer_points"
227
+
228
+ def test_pwmse_with_all_components(self):
229
+ scorer = PWMSE(
230
+ target="goals",
231
+ pred_column="pred",
232
+ labels=list(range(10)),
233
+ granularity=["team_id"],
234
+ compare_to_naive=True,
235
+ naive_granularity=["season"],
236
+ aggregation_level=["game_id"],
237
+ filters=[Filter("minutes", 20, Operator.GREATER_THAN)]
238
+ )
239
+ assert scorer.name == "pwmse_goals_gran:team_id_naive:season_agg:game_id_filters:1"
240
+
241
+ def test_ordinal_loss_with_granularity(self):
242
+ scorer = OrdinalLossScorer(
243
+ target="points",
244
+ pred_column="pred",
245
+ classes=list(range(0, 41)),
246
+ granularity=["game_id"]
247
+ )
248
+ assert scorer.name == "ordinal_loss_scorer_points_gran:game_id"
249
+
250
+ def test_threshold_event_scorer_with_components(self):
251
+ scorer = ThresholdEventScorer(
252
+ dist_column="dist",
253
+ threshold_column="threshold",
254
+ outcome_column="outcome",
255
+ labels=list(range(10)),
256
+ granularity=["game_id"],
257
+ compare_to_naive=True
258
+ )
259
+ assert scorer.name == "threshold_event_scorer___event___gran:game_id_naive"
260
+
261
+ def test_long_aggregation_abbreviated(self):
262
+ scorer = MeanBiasScorer(
263
+ target="points",
264
+ pred_column="pred",
265
+ aggregation_level=["a", "b", "c", "d", "e"]
266
+ )
267
+ assert scorer.name == "mean_bias_scorer_points_agg:a+b+c+2more"
268
+
269
+ def test_long_naive_granularity_abbreviated(self):
270
+ scorer = MeanBiasScorer(
271
+ target="points",
272
+ pred_column="pred",
273
+ compare_to_naive=True,
274
+ naive_granularity=["a", "b", "c", "d"]
275
+ )
276
+ assert scorer.name == "mean_bias_scorer_points_naive:a+b+c+1more"
277
+
278
+ def test_exactly_three_columns_no_abbreviation(self):
279
+ scorer = MeanBiasScorer(
280
+ target="points",
281
+ pred_column="pred",
282
+ granularity=["a", "b", "c"]
283
+ )
284
+ assert scorer.name == "mean_bias_scorer_points_gran:a+b+c"
285
+
286
+ def test_four_columns_abbreviated(self):
287
+ scorer = MeanBiasScorer(
288
+ target="points",
289
+ pred_column="pred",
290
+ granularity=["a", "b", "c", "d"]
291
+ )
292
+ assert scorer.name == "mean_bias_scorer_points_gran:a+b+c+1more"