spforge 0.8.8__py3-none-any.whl → 0.8.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/autopipeline.py +169 -5
- spforge/estimator/_group_by_estimator.py +11 -3
- spforge/performance_transformers/_performance_manager.py +2 -4
- spforge/ratings/_player_rating.py +131 -28
- spforge/ratings/start_rating_generator.py +1 -1
- spforge/ratings/team_start_rating_generator.py +1 -1
- spforge/ratings/utils.py +16 -6
- spforge/scorer/_score.py +42 -11
- spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.8.dist-info → spforge-0.8.18.dist-info}/METADATA +1 -1
- {spforge-0.8.8.dist-info → spforge-0.8.18.dist-info}/RECORD +20 -18
- {spforge-0.8.8.dist-info → spforge-0.8.18.dist-info}/WHEEL +1 -1
- tests/performance_transformers/test_performance_manager.py +15 -0
- tests/ratings/test_player_rating_generator.py +127 -0
- tests/ratings/test_player_rating_no_mutation.py +214 -0
- tests/ratings/test_utils_scaled_weights.py +136 -0
- tests/scorer/test_score.py +142 -0
- tests/test_autopipeline.py +336 -6
- {spforge-0.8.8.dist-info → spforge-0.8.18.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.8.dist-info → spforge-0.8.18.dist-info}/top_level.txt +0 -0
spforge/ratings/utils.py
CHANGED
|
@@ -2,6 +2,10 @@ import polars as pl
|
|
|
2
2
|
|
|
3
3
|
from spforge.data_structures import ColumnNames
|
|
4
4
|
|
|
5
|
+
# Internal column names for scaled participation weights
|
|
6
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
7
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
8
|
+
|
|
5
9
|
|
|
6
10
|
def add_team_rating(
|
|
7
11
|
df: pl.DataFrame,
|
|
@@ -46,11 +50,14 @@ def add_team_rating_projected(
|
|
|
46
50
|
tid = column_names.team_id
|
|
47
51
|
ppw = column_names.projected_participation_weight
|
|
48
52
|
|
|
49
|
-
if
|
|
53
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
54
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
55
|
+
|
|
56
|
+
if weight_col and weight_col in df.columns:
|
|
50
57
|
return df.with_columns(
|
|
51
58
|
(
|
|
52
|
-
(pl.col(
|
|
53
|
-
/ pl.col(
|
|
59
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
|
|
60
|
+
/ pl.col(weight_col).sum().over([mid, tid])
|
|
54
61
|
).alias(team_rating_out)
|
|
55
62
|
)
|
|
56
63
|
|
|
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
|
|
|
118
125
|
mid = column_names.match_id
|
|
119
126
|
ppw = column_names.projected_participation_weight
|
|
120
127
|
|
|
121
|
-
if
|
|
128
|
+
# Use scaled column if available (clipped to [0, 1]), otherwise raw column
|
|
129
|
+
weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
|
|
130
|
+
|
|
131
|
+
if weight_col and weight_col in df.columns:
|
|
122
132
|
return df.with_columns(
|
|
123
133
|
(
|
|
124
|
-
(pl.col(
|
|
125
|
-
/ pl.col(
|
|
134
|
+
(pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
|
|
135
|
+
/ pl.col(weight_col).sum().over(mid)
|
|
126
136
|
).alias(rating_mean_out)
|
|
127
137
|
)
|
|
128
138
|
|
spforge/scorer/_score.py
CHANGED
|
@@ -366,18 +366,49 @@ class PWMSE(BaseScorer):
|
|
|
366
366
|
self.labels = labels
|
|
367
367
|
self.evaluation_labels = evaluation_labels
|
|
368
368
|
|
|
369
|
+
self._needs_extension = False
|
|
370
|
+
self._needs_slicing = False
|
|
369
371
|
self._eval_indices: list[int] | None = None
|
|
372
|
+
self._extension_mapping: dict[int, int] | None = None
|
|
373
|
+
|
|
370
374
|
if self.evaluation_labels is not None and self.labels is not None:
|
|
371
|
-
|
|
372
|
-
|
|
375
|
+
training_set = set(self.labels)
|
|
376
|
+
eval_set = set(self.evaluation_labels)
|
|
377
|
+
|
|
378
|
+
if eval_set <= training_set:
|
|
379
|
+
self._needs_slicing = True
|
|
380
|
+
label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
|
|
381
|
+
self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
|
|
382
|
+
elif training_set <= eval_set:
|
|
383
|
+
self._needs_extension = True
|
|
384
|
+
eval_label_to_idx = {lbl: i for i, lbl in enumerate(self.evaluation_labels)}
|
|
385
|
+
self._extension_mapping = {
|
|
386
|
+
train_idx: eval_label_to_idx[lbl]
|
|
387
|
+
for train_idx, lbl in enumerate(self.labels)
|
|
388
|
+
}
|
|
389
|
+
else:
|
|
390
|
+
raise ValueError(
|
|
391
|
+
f"evaluation_labels must be a subset or superset of labels. "
|
|
392
|
+
f"labels={self.labels}, evaluation_labels={self.evaluation_labels}"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
def _align_predictions(self, preds: np.ndarray) -> np.ndarray:
|
|
396
|
+
if self._needs_slicing and self._eval_indices is not None:
|
|
397
|
+
sliced = preds[:, self._eval_indices]
|
|
398
|
+
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
399
|
+
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
400
|
+
return sliced / row_sums
|
|
401
|
+
|
|
402
|
+
if self._needs_extension and self._extension_mapping is not None:
|
|
403
|
+
n_samples = preds.shape[0]
|
|
404
|
+
n_eval_labels = len(self.evaluation_labels)
|
|
405
|
+
extended = np.full((n_samples, n_eval_labels), 1e-5, dtype=np.float64)
|
|
406
|
+
for train_idx, eval_idx in self._extension_mapping.items():
|
|
407
|
+
extended[:, eval_idx] = preds[:, train_idx]
|
|
408
|
+
row_sums = extended.sum(axis=1, keepdims=True)
|
|
409
|
+
return extended / row_sums
|
|
373
410
|
|
|
374
|
-
|
|
375
|
-
if self._eval_indices is None:
|
|
376
|
-
return preds
|
|
377
|
-
sliced = preds[:, self._eval_indices]
|
|
378
|
-
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
379
|
-
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
380
|
-
return sliced / row_sums
|
|
411
|
+
return preds
|
|
381
412
|
|
|
382
413
|
def _get_scoring_labels(self) -> list[int]:
|
|
383
414
|
if self.evaluation_labels is not None:
|
|
@@ -446,7 +477,7 @@ class PWMSE(BaseScorer):
|
|
|
446
477
|
|
|
447
478
|
targets = gran_df[self.target].to_numpy().astype(np.float64)
|
|
448
479
|
preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
|
|
449
|
-
preds = self.
|
|
480
|
+
preds = self._align_predictions(preds)
|
|
450
481
|
score = self._pwmse_score(targets, preds)
|
|
451
482
|
if self.compare_to_naive:
|
|
452
483
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
@@ -464,7 +495,7 @@ class PWMSE(BaseScorer):
|
|
|
464
495
|
|
|
465
496
|
targets = df[self.target].to_numpy().astype(np.float64)
|
|
466
497
|
preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
|
|
467
|
-
preds = self.
|
|
498
|
+
preds = self._align_predictions(preds)
|
|
468
499
|
score = self._pwmse_score(targets, preds)
|
|
469
500
|
if self.compare_to_naive:
|
|
470
501
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
11
|
-
def __init__(self, granularity: list[str]):
|
|
11
|
+
def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
|
|
12
12
|
self.granularity = granularity
|
|
13
|
+
self.aggregation_weight = aggregation_weight
|
|
13
14
|
|
|
14
15
|
@nw.narwhalify
|
|
15
16
|
def fit(self, X: IntoFrameT, y: Any = None):
|
|
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
26
27
|
raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
|
|
27
28
|
|
|
28
29
|
non_keys = [c for c in df.columns if c not in keys]
|
|
29
|
-
|
|
30
|
+
schema = df.schema
|
|
31
|
+
num_cols = [c for c in non_keys if schema[c].is_numeric()]
|
|
30
32
|
other_cols = [c for c in non_keys if c not in num_cols]
|
|
31
33
|
|
|
32
34
|
aggs: list[nw.Expr] = []
|
|
33
35
|
|
|
36
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
37
|
+
weight_col = getattr(self, "aggregation_weight", None)
|
|
38
|
+
has_weight = weight_col and weight_col in df.columns
|
|
39
|
+
|
|
34
40
|
for c in num_cols:
|
|
35
|
-
|
|
41
|
+
if c == weight_col:
|
|
42
|
+
aggs.append(nw.col(c).sum().alias(c))
|
|
43
|
+
elif has_weight:
|
|
44
|
+
aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
|
|
45
|
+
aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
|
|
46
|
+
else:
|
|
47
|
+
aggs.append(nw.col(c).mean().alias(c))
|
|
36
48
|
|
|
37
49
|
for c in other_cols:
|
|
38
50
|
aggs.append(nw.col(c).first().alias(c))
|
|
39
51
|
|
|
52
|
+
if has_weight:
|
|
53
|
+
aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
|
|
54
|
+
|
|
40
55
|
out = df.group_by(keys).agg(aggs)
|
|
56
|
+
|
|
57
|
+
if has_weight:
|
|
58
|
+
weighted_cols = [c for c in num_cols if c != weight_col]
|
|
59
|
+
for c in weighted_cols:
|
|
60
|
+
out = out.with_columns(
|
|
61
|
+
nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
|
|
62
|
+
.then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
|
|
63
|
+
.otherwise(nw.col(f"__{c}_fallback"))
|
|
64
|
+
.alias(c)
|
|
65
|
+
)
|
|
66
|
+
drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
|
|
67
|
+
drop_cols += [f"__{c}_fallback" for c in weighted_cols]
|
|
68
|
+
drop_cols.append("__weight_sum")
|
|
69
|
+
out = out.drop(drop_cols)
|
|
70
|
+
|
|
41
71
|
return out
|
|
42
72
|
|
|
43
73
|
@nw.narwhalify
|
|
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
59
89
|
if sample_weight is not None:
|
|
60
90
|
df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
|
|
61
91
|
|
|
62
|
-
|
|
92
|
+
y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
|
|
93
|
+
non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
|
|
94
|
+
if len(non_uniform) > 0:
|
|
95
|
+
raise ValueError("Target (y) must be uniform within each granularity group")
|
|
63
96
|
|
|
64
|
-
|
|
65
|
-
agg_exprs = [nw.col("__y").mean().alias("__y")]
|
|
66
|
-
else:
|
|
67
|
-
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
97
|
+
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
68
98
|
|
|
69
99
|
if sample_weight is not None:
|
|
70
100
|
agg_exprs.append(nw.col("__sw").sum().alias("__sw"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.18
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -14,7 +14,7 @@ examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
14
14
|
examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
|
|
15
15
|
examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
|
|
16
16
|
spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
|
|
17
|
-
spforge/autopipeline.py,sha256=
|
|
17
|
+
spforge/autopipeline.py,sha256=rZ6FhJxcgNLvtr3hTVkEiW4BiorgXxADThfMuQ42orE,29866
|
|
18
18
|
spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
|
|
19
19
|
spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
|
|
20
20
|
spforge/features_generator_pipeline.py,sha256=n8vzZKqXNFcFRDWZhllnkhAh5NFXdOD3FEIOpHcay8E,8208
|
|
@@ -30,7 +30,7 @@ spforge/estimator/__init__.py,sha256=zIJ4u7WGPOALPx8kVBppBOqklI4lQPl9QBWT8JjjFoY
|
|
|
30
30
|
spforge/estimator/_conditional_estimator.py,sha256=JSHpOg5lv3kRv_VzSZ0fKbwCO2dJv9XpyLs9lS81psU,4904
|
|
31
31
|
spforge/estimator/_frequency_bucketing_classifier.py,sha256=d7wDpOCoKWf-WoXtzwahjtmAozkFdKE3-pzs477WMYc,6055
|
|
32
32
|
spforge/estimator/_granularity_estimator.py,sha256=pUNmtpDFoOVbS9mHfO-zvidPIKJgWts0y2VnhJ8VWww,3829
|
|
33
|
-
spforge/estimator/_group_by_estimator.py,sha256=
|
|
33
|
+
spforge/estimator/_group_by_estimator.py,sha256=o-xv_PJJyWBaKv5Eo4EPbOvb9i0CuebZnX4GtEFp_Js,3120
|
|
34
34
|
spforge/estimator/_ordinal_classifier.py,sha256=j_dfVHeX-6eZgPwwsYbkbP6bPrKH2a5S-N8vfP5hneA,1993
|
|
35
35
|
spforge/estimator/_sklearn_enhancer_estimator.py,sha256=DZ-UlmeazXPd6uEnlbVv79syZ5FPa64voUyKArtjjUs,4664
|
|
36
36
|
spforge/feature_generator/__init__.py,sha256=wfLfUkC_lLOCpy7NgDytK-l3HUAuhikuQXdKCgSGbuA,556
|
|
@@ -47,32 +47,32 @@ spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8
|
|
|
47
47
|
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
|
|
48
48
|
spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
|
|
49
49
|
spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
|
|
50
|
-
spforge/performance_transformers/_performance_manager.py,sha256=
|
|
50
|
+
spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI_s9Y-9eMXIyhPTUKrwsXRtgYp0k,9620
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=JSTXdaRw_b8ZoZxgmMnZrYG7gPg8GKawqalLd16SK1M,56066
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
58
|
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
59
59
|
spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
|
|
60
|
-
spforge/ratings/start_rating_generator.py,sha256=
|
|
60
|
+
spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
|
|
61
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
62
|
-
spforge/ratings/team_start_rating_generator.py,sha256=
|
|
63
|
-
spforge/ratings/utils.py,sha256=
|
|
62
|
+
spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
|
|
63
|
+
spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
|
|
64
64
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
65
|
-
spforge/scorer/_score.py,sha256=
|
|
65
|
+
spforge/scorer/_score.py,sha256=kNuqiK3F5mUEAVD7KjWYY7E_AkRrspR362QBm_jyElg,57623
|
|
66
66
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
67
67
|
spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
|
|
68
68
|
spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
|
|
69
69
|
spforge/transformers/_operator.py,sha256=jOH7wdMBLg6R2hlH_FU6eA0gjs-Q0vFimTo7fXgKpjI,2964
|
|
70
|
-
spforge/transformers/_other_transformer.py,sha256=
|
|
70
|
+
spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo6nE_9-3M10owA,4646
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
75
|
-
tests/test_autopipeline.py,sha256=
|
|
74
|
+
spforge-0.8.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
|
+
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
78
78
|
tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
|
|
@@ -92,12 +92,14 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
|
|
|
92
92
|
tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
|
|
93
93
|
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
|
|
95
|
-
tests/performance_transformers/test_performance_manager.py,sha256=
|
|
95
|
+
tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
|
|
96
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
97
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=SKLaBQBsHYslc2Nia2AxZ8A9Cy16MbZAWjLyOjvcMnA,64094
|
|
98
|
+
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
98
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
99
100
|
tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
|
|
100
|
-
tests/
|
|
101
|
+
tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
|
|
102
|
+
tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
|
|
101
103
|
tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
|
|
102
104
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
103
105
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
@@ -105,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
105
107
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
106
108
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
107
109
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
108
|
-
spforge-0.8.
|
|
109
|
-
spforge-0.8.
|
|
110
|
-
spforge-0.8.
|
|
111
|
-
spforge-0.8.
|
|
110
|
+
spforge-0.8.18.dist-info/METADATA,sha256=54l0UTrew2ot0_4k22hLKL-oXbQ4hlA1_KAXIqf_umw,20048
|
|
111
|
+
spforge-0.8.18.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
112
|
+
spforge-0.8.18.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
113
|
+
spforge-0.8.18.dist-info/RECORD,,
|
|
@@ -56,6 +56,21 @@ def test_performance_weights_manager_basic_flow(sample_data):
|
|
|
56
56
|
assert output_df["weighted_performance"].iloc[0] == pytest.approx(0.6)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
def test_performance_weights_manager_keeps_mean_when_weights_not_normalized():
|
|
60
|
+
df = pd.DataFrame(
|
|
61
|
+
{
|
|
62
|
+
"feat_a": [0.0, 1.0, 2.0, 3.0],
|
|
63
|
+
"feat_b": [3.0, 2.0, 1.0, 0.0],
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
weights = [ColumnWeight(name="feat_a", weight=0.9), ColumnWeight(name="feat_b", weight=0.5)]
|
|
67
|
+
|
|
68
|
+
manager = PerformanceWeightsManager(weights=weights, transformer_names=["min_max"], prefix="")
|
|
69
|
+
output_df = nw.from_native(manager.fit_transform(df)).to_pandas()
|
|
70
|
+
|
|
71
|
+
assert output_df["weighted_performance"].mean() == pytest.approx(0.5, abs=1e-6)
|
|
72
|
+
|
|
73
|
+
|
|
59
74
|
def test_lower_is_better_logic():
|
|
60
75
|
df = pd.DataFrame({"feat_a": [1.0, 0.0]})
|
|
61
76
|
weights = [ColumnWeight(name="feat_a", weight=1.0, lower_is_better=True)]
|
|
@@ -551,6 +551,63 @@ def test_fit_transform_scales_participation_weight_by_fit_quantile(base_cn):
|
|
|
551
551
|
assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
|
|
552
552
|
|
|
553
553
|
|
|
554
|
+
def test_fit_transform_auto_scales_participation_weight_when_out_of_bounds(base_cn):
|
|
555
|
+
"""Automatically enable scaling when participation weights exceed [0, 1]."""
|
|
556
|
+
df = pl.DataFrame(
|
|
557
|
+
{
|
|
558
|
+
"pid": ["P1", "P2", "O1", "O2"],
|
|
559
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
560
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
561
|
+
"dt": ["2024-01-01"] * 4,
|
|
562
|
+
"perf": [0.9, 0.9, 0.1, 0.1],
|
|
563
|
+
"pw": [10.0, 20.0, 10.0, 10.0],
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
gen = PlayerRatingGenerator(
|
|
567
|
+
performance_column="perf",
|
|
568
|
+
column_names=base_cn,
|
|
569
|
+
auto_scale_performance=True,
|
|
570
|
+
start_harcoded_start_rating=1000.0,
|
|
571
|
+
)
|
|
572
|
+
gen.fit_transform(df)
|
|
573
|
+
|
|
574
|
+
start_rating = 1000.0
|
|
575
|
+
p1_change = gen._player_off_ratings["P1"].rating_value - start_rating
|
|
576
|
+
p2_change = gen._player_off_ratings["P2"].rating_value - start_rating
|
|
577
|
+
|
|
578
|
+
q = df["pw"].quantile(0.99, "linear")
|
|
579
|
+
expected_ratio = min(1.0, 10.0 / q) / min(1.0, 20.0 / q)
|
|
580
|
+
|
|
581
|
+
assert gen.scale_participation_weights is True
|
|
582
|
+
assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def test_fit_transform_auto_scale_logs_warning_when_out_of_bounds(base_cn, caplog):
|
|
586
|
+
"""Auto-scaling should emit a warning when participation weights exceed [0, 1]."""
|
|
587
|
+
df = pl.DataFrame(
|
|
588
|
+
{
|
|
589
|
+
"pid": ["P1", "P2", "O1", "O2"],
|
|
590
|
+
"tid": ["T1", "T1", "T2", "T2"],
|
|
591
|
+
"mid": ["M1", "M1", "M1", "M1"],
|
|
592
|
+
"dt": ["2024-01-01"] * 4,
|
|
593
|
+
"perf": [0.9, 0.9, 0.1, 0.1],
|
|
594
|
+
"pw": [10.0, 20.0, 10.0, 10.0],
|
|
595
|
+
}
|
|
596
|
+
)
|
|
597
|
+
gen = PlayerRatingGenerator(
|
|
598
|
+
performance_column="perf",
|
|
599
|
+
column_names=base_cn,
|
|
600
|
+
auto_scale_performance=True,
|
|
601
|
+
start_harcoded_start_rating=1000.0,
|
|
602
|
+
)
|
|
603
|
+
with caplog.at_level("WARNING"):
|
|
604
|
+
gen.fit_transform(df)
|
|
605
|
+
|
|
606
|
+
assert any(
|
|
607
|
+
"Auto-scaling participation weights" in record.message for record in caplog.records
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
|
|
554
611
|
def test_future_transform_scales_projected_participation_weight_by_fit_quantile():
|
|
555
612
|
"""Future projected participation weights should scale with fit quantile and be clipped."""
|
|
556
613
|
cn = ColumnNames(
|
|
@@ -1689,3 +1746,73 @@ def test_fit_transform__player_rating_difference_from_team_projected_feature(bas
|
|
|
1689
1746
|
for row in result.iter_rows(named=True):
|
|
1690
1747
|
expected = row[player_col] - row[team_col]
|
|
1691
1748
|
assert row[diff_col] == pytest.approx(expected, rel=1e-9)
|
|
1749
|
+
|
|
1750
|
+
|
|
1751
|
+
def test_fit_transform__start_league_quantile_uses_existing_player_ratings(base_cn):
|
|
1752
|
+
"""
|
|
1753
|
+
Bug reproduction: start_league_quantile should use percentile of existing player
|
|
1754
|
+
ratings for new players, but update_players_to_leagues is never called so
|
|
1755
|
+
_league_player_ratings stays empty and all new players get default rating.
|
|
1756
|
+
|
|
1757
|
+
Expected: New player P_NEW should start at 5th percentile of existing ratings (~920)
|
|
1758
|
+
Actual: New player starts at default 1000 because _league_player_ratings is empty
|
|
1759
|
+
"""
|
|
1760
|
+
import numpy as np
|
|
1761
|
+
|
|
1762
|
+
num_existing_players = 60
|
|
1763
|
+
player_ids = [f"P{i}" for i in range(num_existing_players)]
|
|
1764
|
+
team_ids = [f"T{i % 2 + 1}" for i in range(num_existing_players)]
|
|
1765
|
+
|
|
1766
|
+
df1 = pl.DataFrame(
|
|
1767
|
+
{
|
|
1768
|
+
"pid": player_ids,
|
|
1769
|
+
"tid": team_ids,
|
|
1770
|
+
"mid": ["M1"] * num_existing_players,
|
|
1771
|
+
"dt": ["2024-01-01"] * num_existing_players,
|
|
1772
|
+
"perf": [0.3 + (i % 10) * 0.07 for i in range(num_existing_players)],
|
|
1773
|
+
"pw": [1.0] * num_existing_players,
|
|
1774
|
+
}
|
|
1775
|
+
)
|
|
1776
|
+
|
|
1777
|
+
gen = PlayerRatingGenerator(
|
|
1778
|
+
performance_column="perf",
|
|
1779
|
+
column_names=base_cn,
|
|
1780
|
+
auto_scale_performance=True,
|
|
1781
|
+
start_league_quantile=0.05,
|
|
1782
|
+
start_min_count_for_percentiles=50,
|
|
1783
|
+
features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
|
|
1784
|
+
)
|
|
1785
|
+
gen.fit_transform(df1)
|
|
1786
|
+
|
|
1787
|
+
existing_ratings = [
|
|
1788
|
+
gen._player_off_ratings[pid].rating_value for pid in player_ids
|
|
1789
|
+
]
|
|
1790
|
+
expected_quantile_rating = np.percentile(existing_ratings, 5)
|
|
1791
|
+
|
|
1792
|
+
srg = gen.start_rating_generator
|
|
1793
|
+
assert len(srg._league_player_ratings.get(None, [])) >= 50, (
|
|
1794
|
+
f"Expected _league_player_ratings to have >=50 entries but got "
|
|
1795
|
+
f"{len(srg._league_player_ratings.get(None, []))}. "
|
|
1796
|
+
"update_players_to_leagues is never called."
|
|
1797
|
+
)
|
|
1798
|
+
|
|
1799
|
+
df2 = pl.DataFrame(
|
|
1800
|
+
{
|
|
1801
|
+
"pid": ["P_NEW", "P0"],
|
|
1802
|
+
"tid": ["T1", "T2"],
|
|
1803
|
+
"mid": ["M2", "M2"],
|
|
1804
|
+
"dt": ["2024-01-02", "2024-01-02"],
|
|
1805
|
+
"pw": [1.0, 1.0],
|
|
1806
|
+
}
|
|
1807
|
+
)
|
|
1808
|
+
result = gen.future_transform(df2)
|
|
1809
|
+
|
|
1810
|
+
new_player_start_rating = result.filter(pl.col("pid") == "P_NEW")[
|
|
1811
|
+
"player_off_rating_perf"
|
|
1812
|
+
][0]
|
|
1813
|
+
|
|
1814
|
+
assert new_player_start_rating == pytest.approx(expected_quantile_rating, rel=0.1), (
|
|
1815
|
+
f"New player should start at 5th percentile ({expected_quantile_rating:.1f}) "
|
|
1816
|
+
f"but got {new_player_start_rating:.1f}. "
|
|
1817
|
+
"start_league_quantile has no effect because update_players_to_leagues is never called."
|
|
1818
|
+
)
|