spforge 0.8.26__tar.gz → 0.8.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.26/spforge.egg-info → spforge-0.8.29}/PKG-INFO +1 -1
- {spforge-0.8.26 → spforge-0.8.29}/pyproject.toml +1 -1
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/_player_rating.py +71 -24
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/player_performance_predictor.py +11 -13
- {spforge-0.8.26 → spforge-0.8.29}/spforge/scorer/_score.py +121 -0
- {spforge-0.8.26 → spforge-0.8.29/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.26 → spforge-0.8.29}/spforge.egg-info/SOURCES.txt +1 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/ratings/test_player_rating_generator.py +392 -1
- spforge-0.8.29/tests/scorer/test_scorer_name.py +292 -0
- {spforge-0.8.26 → spforge-0.8.29}/LICENSE +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/MANIFEST.in +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/README.md +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/game_level_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/lol/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/setup.cfg +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/autopipeline.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/data_structures.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_group_by_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/ratings/utils.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_other_transformer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge/utils.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/ratings/test_player_rating_no_mutation.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/ratings/test_utils_scaled_weights.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/test_autopipeline.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.26 → spforge-0.8.29}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.29
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.29"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import copy
|
|
5
|
-
import
|
|
5
|
+
import json
|
|
6
6
|
import logging
|
|
7
|
+
import math
|
|
8
|
+
from collections.abc import Mapping
|
|
7
9
|
from typing import Any, Literal
|
|
8
10
|
|
|
9
11
|
import narwhals.stable.v2 as nw
|
|
@@ -164,6 +166,18 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
164
166
|
self.start_min_match_count_team_rating = start_min_match_count_team_rating
|
|
165
167
|
self.start_hardcoded_start_rating = start_harcoded_start_rating
|
|
166
168
|
|
|
169
|
+
if hasattr(self._performance_predictor, '_reference_rating'):
|
|
170
|
+
effective_start = self.start_hardcoded_start_rating
|
|
171
|
+
|
|
172
|
+
if effective_start is None and self.start_league_ratings:
|
|
173
|
+
league_ratings = list(self.start_league_ratings.values())
|
|
174
|
+
effective_start = sum(league_ratings) / len(league_ratings)
|
|
175
|
+
|
|
176
|
+
if effective_start is None:
|
|
177
|
+
effective_start = 1000
|
|
178
|
+
|
|
179
|
+
self._performance_predictor._reference_rating = effective_start
|
|
180
|
+
|
|
167
181
|
self.team_id_change_confidence_sum_decrease = team_id_change_confidence_sum_decrease
|
|
168
182
|
self.column_names = column_names
|
|
169
183
|
|
|
@@ -515,6 +529,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
515
529
|
)
|
|
516
530
|
|
|
517
531
|
perf_value = pre_player.match_performance.performance_value
|
|
532
|
+
|
|
518
533
|
if perf_value is None:
|
|
519
534
|
off_change = 0.0
|
|
520
535
|
else:
|
|
@@ -608,6 +623,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
608
623
|
)
|
|
609
624
|
|
|
610
625
|
perf_value = pre_player.match_performance.performance_value
|
|
626
|
+
|
|
611
627
|
if perf_value is None:
|
|
612
628
|
off_change = 0.0
|
|
613
629
|
else:
|
|
@@ -912,10 +928,16 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
912
928
|
if cn.league and cn.league in df.columns:
|
|
913
929
|
player_stat_cols.append(cn.league)
|
|
914
930
|
|
|
915
|
-
if
|
|
931
|
+
if (
|
|
932
|
+
cn.team_players_playing_time
|
|
933
|
+
and cn.team_players_playing_time in df.columns
|
|
934
|
+
):
|
|
916
935
|
player_stat_cols.append(cn.team_players_playing_time)
|
|
917
936
|
|
|
918
|
-
if
|
|
937
|
+
if (
|
|
938
|
+
cn.opponent_players_playing_time
|
|
939
|
+
and cn.opponent_players_playing_time in df.columns
|
|
940
|
+
):
|
|
919
941
|
player_stat_cols.append(cn.opponent_players_playing_time)
|
|
920
942
|
|
|
921
943
|
df = df.with_columns(pl.struct(player_stat_cols).alias(PLAYER_STATS))
|
|
@@ -948,6 +970,40 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
948
970
|
match_df = self._add_day_number(match_df, cn.start_date, "__day_number")
|
|
949
971
|
return match_df
|
|
950
972
|
|
|
973
|
+
def _get_players_playing_time(
|
|
974
|
+
self, source: Mapping[str, Any], column_name: str | None
|
|
975
|
+
) -> dict[str, float] | None:
|
|
976
|
+
if not column_name:
|
|
977
|
+
return None
|
|
978
|
+
return self._normalize_players_playing_time(source.get(column_name))
|
|
979
|
+
|
|
980
|
+
@staticmethod
|
|
981
|
+
def _normalize_players_playing_time(raw_value: Any) -> dict[str, float] | None:
|
|
982
|
+
if raw_value is None:
|
|
983
|
+
return None
|
|
984
|
+
|
|
985
|
+
if isinstance(raw_value, str):
|
|
986
|
+
raw_text = raw_value
|
|
987
|
+
raw_value = raw_value.strip()
|
|
988
|
+
if not raw_value:
|
|
989
|
+
return None
|
|
990
|
+
try:
|
|
991
|
+
raw_value = json.loads(raw_value)
|
|
992
|
+
except json.JSONDecodeError as exc:
|
|
993
|
+
raise ValueError(
|
|
994
|
+
f"unable to parse playing time JSON {raw_text!r}: {exc}"
|
|
995
|
+
) from exc
|
|
996
|
+
|
|
997
|
+
if isinstance(raw_value, Mapping):
|
|
998
|
+
normalized: dict[str, float] = {}
|
|
999
|
+
for key, value in raw_value.items():
|
|
1000
|
+
if value is None:
|
|
1001
|
+
continue
|
|
1002
|
+
normalized[str(key)] = float(value)
|
|
1003
|
+
return normalized or None
|
|
1004
|
+
|
|
1005
|
+
return None
|
|
1006
|
+
|
|
951
1007
|
def _create_pre_match_players_collection(
|
|
952
1008
|
self, r: dict, stats_col: str, day_number: int, team_id: str
|
|
953
1009
|
) -> PreMatchPlayersCollection:
|
|
@@ -994,17 +1050,12 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
994
1050
|
else None
|
|
995
1051
|
)
|
|
996
1052
|
|
|
997
|
-
team_playing_time =
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
if cn.opponent_players_playing_time:
|
|
1005
|
-
raw_value = team_player.get(cn.opponent_players_playing_time)
|
|
1006
|
-
if raw_value is not None:
|
|
1007
|
-
opponent_playing_time = raw_value
|
|
1053
|
+
team_playing_time = self._get_players_playing_time(
|
|
1054
|
+
team_player, cn.team_players_playing_time
|
|
1055
|
+
)
|
|
1056
|
+
opponent_playing_time = self._get_players_playing_time(
|
|
1057
|
+
team_player, cn.opponent_players_playing_time
|
|
1058
|
+
)
|
|
1008
1059
|
|
|
1009
1060
|
mp = MatchPerformance(
|
|
1010
1061
|
performance_value=perf_val,
|
|
@@ -1245,16 +1296,12 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1245
1296
|
ppw = pw
|
|
1246
1297
|
proj_w.append(float(ppw))
|
|
1247
1298
|
|
|
1248
|
-
team_playing_time =
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
if cn.opponent_players_playing_time:
|
|
1255
|
-
raw_value = tp.get(cn.opponent_players_playing_time)
|
|
1256
|
-
if raw_value is not None:
|
|
1257
|
-
opponent_playing_time = raw_value
|
|
1299
|
+
team_playing_time = self._get_players_playing_time(
|
|
1300
|
+
tp, cn.team_players_playing_time
|
|
1301
|
+
)
|
|
1302
|
+
opponent_playing_time = self._get_players_playing_time(
|
|
1303
|
+
tp, cn.opponent_players_playing_time
|
|
1304
|
+
)
|
|
1258
1305
|
|
|
1259
1306
|
mp = MatchPerformance(
|
|
1260
1307
|
performance_value=get_perf_value(tp),
|
|
@@ -31,6 +31,7 @@ class PlayerPerformancePredictor(ABC):
|
|
|
31
31
|
pass
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
|
|
34
35
|
class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
|
|
35
36
|
|
|
36
37
|
def __init__(
|
|
@@ -38,18 +39,22 @@ class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
|
|
|
38
39
|
coef: float = 0.0015,
|
|
39
40
|
last_sample_count: int = 1500,
|
|
40
41
|
min_count_for_historical_average: int = 200,
|
|
41
|
-
historical_average_value_default: float = 1000,
|
|
42
42
|
):
|
|
43
43
|
self.coef = coef
|
|
44
44
|
self.last_sample_count = last_sample_count
|
|
45
45
|
self.min_count_for_historical_average = min_count_for_historical_average
|
|
46
|
-
self.historical_average_value_default = historical_average_value_default
|
|
47
46
|
if self.min_count_for_historical_average < 1:
|
|
48
47
|
raise ValueError("min_count_for_historical_average must be positive")
|
|
49
|
-
self.
|
|
48
|
+
self._reference_rating: float | None = None
|
|
50
49
|
|
|
51
50
|
def reset(self):
|
|
52
|
-
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
def _get_reference_rating(self) -> float:
|
|
54
|
+
"""Get reference rating from rating generator, or default to 1000."""
|
|
55
|
+
if self._reference_rating is not None:
|
|
56
|
+
return self._reference_rating
|
|
57
|
+
return 1000
|
|
53
58
|
|
|
54
59
|
def predict_performance(
|
|
55
60
|
self,
|
|
@@ -57,21 +62,14 @@ class PlayerRatingNonOpponentPerformancePredictor(PlayerPerformancePredictor):
|
|
|
57
62
|
opponent_team_rating: PreMatchTeamRating,
|
|
58
63
|
team_rating: PreMatchTeamRating,
|
|
59
64
|
) -> float:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if len(recent_prev_entries_ratings) > self.min_count_for_historical_average:
|
|
63
|
-
historical_average_rating = sum(recent_prev_entries_ratings) / len(
|
|
64
|
-
recent_prev_entries_ratings
|
|
65
|
-
)
|
|
66
|
-
else:
|
|
67
|
-
historical_average_rating = self.historical_average_value_default
|
|
65
|
+
historical_average_rating = self._get_reference_rating()
|
|
66
|
+
|
|
68
67
|
net_mean_rating_over_historical_average = (
|
|
69
68
|
player_rating.rating_value - historical_average_rating
|
|
70
69
|
)
|
|
71
70
|
|
|
72
71
|
value = self.coef * net_mean_rating_over_historical_average
|
|
73
72
|
prediction = (math.exp(value)) / (1 + math.exp(value))
|
|
74
|
-
self._prev_entries_ratings.append(player_rating.rating_value)
|
|
75
73
|
|
|
76
74
|
return prediction
|
|
77
75
|
|
|
@@ -267,6 +267,7 @@ class BaseScorer(ABC):
|
|
|
267
267
|
granularity: list[str] | None = None,
|
|
268
268
|
compare_to_naive: bool = False,
|
|
269
269
|
naive_granularity: list[str] | None = None,
|
|
270
|
+
_name_override: str | None = None,
|
|
270
271
|
):
|
|
271
272
|
"""
|
|
272
273
|
:param target: The column name of the target
|
|
@@ -277,6 +278,9 @@ class BaseScorer(ABC):
|
|
|
277
278
|
:param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
|
|
278
279
|
:param aggregation_method: Aggregation methods for pred/target when aggregation_level is set.
|
|
279
280
|
:param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
|
|
281
|
+
:param compare_to_naive: If True, returns naive_score - model_score (improvement over naive baseline)
|
|
282
|
+
:param naive_granularity: Granularity for computing naive baseline predictions
|
|
283
|
+
:param _name_override: Override auto-generated name (internal use)
|
|
280
284
|
"""
|
|
281
285
|
self.target = target
|
|
282
286
|
self.pred_column = pred_column
|
|
@@ -295,6 +299,7 @@ class BaseScorer(ABC):
|
|
|
295
299
|
self.granularity = granularity
|
|
296
300
|
self.compare_to_naive = compare_to_naive
|
|
297
301
|
self.naive_granularity = naive_granularity
|
|
302
|
+
self._name_override = _name_override
|
|
298
303
|
|
|
299
304
|
def _resolve_aggregation_method(self, key: str) -> Any:
|
|
300
305
|
if self.aggregation_method is None:
|
|
@@ -359,6 +364,98 @@ class BaseScorer(ABC):
|
|
|
359
364
|
mask = col_mask if mask is None else (mask & col_mask)
|
|
360
365
|
return df.filter(mask)
|
|
361
366
|
|
|
367
|
+
def _get_scorer_id(self) -> str:
|
|
368
|
+
"""Get scorer-specific identifier in snake_case. Override in subclasses if needed."""
|
|
369
|
+
import re
|
|
370
|
+
name = self.__class__.__name__
|
|
371
|
+
# Check if name is all uppercase (acronym like PWMSE)
|
|
372
|
+
if name.isupper():
|
|
373
|
+
return name.lower()
|
|
374
|
+
# Otherwise use regular snake_case conversion
|
|
375
|
+
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
|
376
|
+
|
|
377
|
+
def _format_column_list(self, columns: list[str], max_display: int = 3) -> str:
|
|
378
|
+
"""Format column list with abbreviation for long lists."""
|
|
379
|
+
if len(columns) <= max_display:
|
|
380
|
+
return "+".join(columns)
|
|
381
|
+
shown = "+".join(columns[:max_display])
|
|
382
|
+
remaining = len(columns) - max_display
|
|
383
|
+
return f"{shown}+{remaining}more"
|
|
384
|
+
|
|
385
|
+
def _sanitize_column_name(self, name: str) -> str:
|
|
386
|
+
"""Replace special characters with underscores."""
|
|
387
|
+
import re
|
|
388
|
+
return re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
389
|
+
|
|
390
|
+
def _count_user_filters(self) -> int:
|
|
391
|
+
"""Count filters excluding auto-added validation filter."""
|
|
392
|
+
if not self.filters:
|
|
393
|
+
return 0
|
|
394
|
+
if self.validation_column is None:
|
|
395
|
+
return len(self.filters)
|
|
396
|
+
count = 0
|
|
397
|
+
for f in self.filters:
|
|
398
|
+
if not (f.column_name == self.validation_column and
|
|
399
|
+
f.operator == Operator.EQUALS and
|
|
400
|
+
f.value == 1):
|
|
401
|
+
count += 1
|
|
402
|
+
return count
|
|
403
|
+
|
|
404
|
+
def _generate_name(self) -> str:
|
|
405
|
+
"""Generate readable name from scorer configuration."""
|
|
406
|
+
parts = []
|
|
407
|
+
|
|
408
|
+
parts.append(self._get_scorer_id())
|
|
409
|
+
|
|
410
|
+
parts.append(self._sanitize_column_name(self.target))
|
|
411
|
+
|
|
412
|
+
if self.granularity:
|
|
413
|
+
gran_str = self._format_column_list(self.granularity)
|
|
414
|
+
parts.append(f"gran:{gran_str}")
|
|
415
|
+
|
|
416
|
+
if self.compare_to_naive:
|
|
417
|
+
if self.naive_granularity:
|
|
418
|
+
naive_str = self._format_column_list(self.naive_granularity)
|
|
419
|
+
parts.append(f"naive:{naive_str}")
|
|
420
|
+
else:
|
|
421
|
+
parts.append("naive")
|
|
422
|
+
|
|
423
|
+
if self.aggregation_level:
|
|
424
|
+
agg_str = self._format_column_list(self.aggregation_level)
|
|
425
|
+
parts.append(f"agg:{agg_str}")
|
|
426
|
+
|
|
427
|
+
filter_count = self._count_user_filters()
|
|
428
|
+
if filter_count > 0:
|
|
429
|
+
parts.append(f"filters:{filter_count}")
|
|
430
|
+
|
|
431
|
+
return "_".join(parts)
|
|
432
|
+
|
|
433
|
+
@property
|
|
434
|
+
def name(self) -> str:
|
|
435
|
+
"""
|
|
436
|
+
Generate a human-readable name for this scorer.
|
|
437
|
+
|
|
438
|
+
Returns descriptive name based on scorer configuration including
|
|
439
|
+
target, granularity, naive comparison, aggregation, and filters.
|
|
440
|
+
Only includes components that are actually set (non-None/non-empty).
|
|
441
|
+
|
|
442
|
+
Format: {scorer_id}_{target}[_gran:{cols}][_naive[:cols]][_agg:{cols}][_filters:{n}]
|
|
443
|
+
|
|
444
|
+
Can be overridden by passing _name_override to constructor.
|
|
445
|
+
|
|
446
|
+
Examples:
|
|
447
|
+
>>> scorer = MeanBiasScorer(target="points", pred_column="pred")
|
|
448
|
+
>>> scorer.name
|
|
449
|
+
'mean_bias_scorer_points'
|
|
450
|
+
|
|
451
|
+
>>> scorer = MeanBiasScorer(target="points", granularity=["team_id"], compare_to_naive=True)
|
|
452
|
+
>>> scorer.name
|
|
453
|
+
'mean_bias_scorer_points_gran:team_id_naive'
|
|
454
|
+
"""
|
|
455
|
+
if hasattr(self, '_name_override') and self._name_override is not None:
|
|
456
|
+
return self._name_override
|
|
457
|
+
return self._generate_name()
|
|
458
|
+
|
|
362
459
|
@abstractmethod
|
|
363
460
|
def score(self, df: IntoFrameT) -> float | dict[tuple, float]:
|
|
364
461
|
"""
|
|
@@ -385,6 +482,7 @@ class PWMSE(BaseScorer):
|
|
|
385
482
|
compare_to_naive: bool = False,
|
|
386
483
|
naive_granularity: list[str] | None = None,
|
|
387
484
|
evaluation_labels: list[int] | None = None,
|
|
485
|
+
_name_override: str | None = None,
|
|
388
486
|
):
|
|
389
487
|
self.pred_column_name = pred_column
|
|
390
488
|
super().__init__(
|
|
@@ -397,6 +495,7 @@ class PWMSE(BaseScorer):
|
|
|
397
495
|
validation_column=validation_column,
|
|
398
496
|
compare_to_naive=compare_to_naive,
|
|
399
497
|
naive_granularity=naive_granularity,
|
|
498
|
+
_name_override=_name_override,
|
|
400
499
|
)
|
|
401
500
|
self.labels = labels
|
|
402
501
|
self.evaluation_labels = evaluation_labels
|
|
@@ -553,6 +652,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
553
652
|
labels: list[int] | None = None,
|
|
554
653
|
compare_to_naive: bool = False,
|
|
555
654
|
naive_granularity: list[str] | None = None,
|
|
655
|
+
_name_override: str | None = None,
|
|
556
656
|
):
|
|
557
657
|
"""
|
|
558
658
|
:param pred_column: The column name of the predictions
|
|
@@ -563,6 +663,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
563
663
|
:param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
|
|
564
664
|
:param filters: The filters to apply before calculating
|
|
565
665
|
:param labels: The labels corresponding to each index in probability distributions (e.g., [-5, -4, ..., 35] for rush yards)
|
|
666
|
+
:param _name_override: Override auto-generated name (internal use)
|
|
566
667
|
"""
|
|
567
668
|
|
|
568
669
|
self.pred_column_name = pred_column
|
|
@@ -577,6 +678,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
577
678
|
validation_column=validation_column,
|
|
578
679
|
compare_to_naive=compare_to_naive,
|
|
579
680
|
naive_granularity=naive_granularity,
|
|
681
|
+
_name_override=_name_override,
|
|
580
682
|
)
|
|
581
683
|
|
|
582
684
|
def _mean_bias_score(self, df: IntoFrameT) -> float:
|
|
@@ -691,6 +793,7 @@ class SklearnScorer(BaseScorer):
|
|
|
691
793
|
params: dict[str, Any] = None,
|
|
692
794
|
compare_to_naive: bool = False,
|
|
693
795
|
naive_granularity: list[str] | None = None,
|
|
796
|
+
_name_override: str | None = None,
|
|
694
797
|
):
|
|
695
798
|
"""
|
|
696
799
|
:param pred_column: The column name of the predictions
|
|
@@ -701,6 +804,7 @@ class SklearnScorer(BaseScorer):
|
|
|
701
804
|
:param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
|
|
702
805
|
:param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
|
|
703
806
|
:param filters: The filters to apply before calculating
|
|
807
|
+
:param _name_override: Override auto-generated name (internal use)
|
|
704
808
|
"""
|
|
705
809
|
|
|
706
810
|
super().__init__(
|
|
@@ -713,11 +817,22 @@ class SklearnScorer(BaseScorer):
|
|
|
713
817
|
validation_column=validation_column,
|
|
714
818
|
compare_to_naive=compare_to_naive,
|
|
715
819
|
naive_granularity=naive_granularity,
|
|
820
|
+
_name_override=_name_override,
|
|
716
821
|
)
|
|
717
822
|
self.pred_column_name = pred_column
|
|
718
823
|
self.scorer_function = scorer_function
|
|
719
824
|
self.params = params or {}
|
|
720
825
|
|
|
826
|
+
def _get_scorer_id(self) -> str:
|
|
827
|
+
"""Use the scorer function name."""
|
|
828
|
+
if hasattr(self.scorer_function, '__name__'):
|
|
829
|
+
name = self.scorer_function.__name__
|
|
830
|
+
# Handle lambda functions
|
|
831
|
+
if name == '<lambda>':
|
|
832
|
+
return "custom_metric"
|
|
833
|
+
return name
|
|
834
|
+
return "custom_metric"
|
|
835
|
+
|
|
721
836
|
def _pad_probabilities(
|
|
722
837
|
self, y_true: list[Any], probabilities: list[list[float]]
|
|
723
838
|
) -> tuple[list[list[float]], dict[str, Any]]:
|
|
@@ -827,6 +942,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
827
942
|
filters: list[Filter] | None = None,
|
|
828
943
|
compare_to_naive: bool = False,
|
|
829
944
|
naive_granularity: list[str] | None = None,
|
|
945
|
+
_name_override: str | None = None,
|
|
830
946
|
):
|
|
831
947
|
|
|
832
948
|
self.pred_column_name = pred_column
|
|
@@ -841,6 +957,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
841
957
|
validation_column=validation_column,
|
|
842
958
|
compare_to_naive=compare_to_naive,
|
|
843
959
|
naive_granularity=naive_granularity,
|
|
960
|
+
_name_override=_name_override,
|
|
844
961
|
)
|
|
845
962
|
|
|
846
963
|
def _aggregate_pandas_series(
|
|
@@ -1064,6 +1181,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1064
1181
|
labels: list[int] | None = None,
|
|
1065
1182
|
compare_to_naive: bool = False,
|
|
1066
1183
|
naive_granularity: list[str] | None = None,
|
|
1184
|
+
_name_override: str | None = None,
|
|
1067
1185
|
):
|
|
1068
1186
|
self.pred_column_name = pred_column
|
|
1069
1187
|
super().__init__(
|
|
@@ -1076,6 +1194,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1076
1194
|
validation_column=validation_column,
|
|
1077
1195
|
compare_to_naive=compare_to_naive,
|
|
1078
1196
|
naive_granularity=naive_granularity,
|
|
1197
|
+
_name_override=_name_override,
|
|
1079
1198
|
)
|
|
1080
1199
|
self.classes = classes
|
|
1081
1200
|
|
|
@@ -1263,6 +1382,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1263
1382
|
filters: list["Filter"] | None = None,
|
|
1264
1383
|
compare_to_naive: bool = False,
|
|
1265
1384
|
naive_granularity: list[str] | None = None,
|
|
1385
|
+
_name_override: str | None = None,
|
|
1266
1386
|
):
|
|
1267
1387
|
self.pred_column_name = dist_column
|
|
1268
1388
|
super().__init__(
|
|
@@ -1275,6 +1395,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1275
1395
|
validation_column=validation_column,
|
|
1276
1396
|
compare_to_naive=compare_to_naive,
|
|
1277
1397
|
naive_granularity=naive_granularity,
|
|
1398
|
+
_name_override=_name_override,
|
|
1278
1399
|
)
|
|
1279
1400
|
|
|
1280
1401
|
self.dist_column = dist_column
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.29
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -109,6 +109,7 @@ tests/ratings/test_team_rating_generator.py
|
|
|
109
109
|
tests/ratings/test_utils_scaled_weights.py
|
|
110
110
|
tests/scorer/test_score.py
|
|
111
111
|
tests/scorer/test_score_aggregation_granularity.py
|
|
112
|
+
tests/scorer/test_scorer_name.py
|
|
112
113
|
tests/transformers/test_estimator_transformer_context.py
|
|
113
114
|
tests/transformers/test_net_over_predicted.py
|
|
114
115
|
tests/transformers/test_other_transformer.py
|