spforge 0.8.33__py3-none-any.whl → 0.8.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/performance_transformers/__init__.py +1 -0
- spforge/performance_transformers/_performance_manager.py +46 -0
- spforge/performance_transformers/_performances_transformers.py +80 -0
- spforge/ratings/_player_rating.py +15 -0
- spforge/ratings/enums.py +1 -0
- spforge/ratings/utils.py +13 -0
- {spforge-0.8.33.dist-info → spforge-0.8.35.dist-info}/METADATA +1 -1
- {spforge-0.8.33.dist-info → spforge-0.8.35.dist-info}/RECORD +14 -14
- tests/performance_transformers/test_performance_manager.py +129 -1
- tests/performance_transformers/test_performances_transformers.py +196 -0
- tests/ratings/test_player_rating_generator.py +32 -3
- {spforge-0.8.33.dist-info → spforge-0.8.35.dist-info}/WHEEL +0 -0
- {spforge-0.8.33.dist-info → spforge-0.8.35.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.33.dist-info → spforge-0.8.35.dist-info}/top_level.txt +0 -0
|
@@ -6,5 +6,6 @@ from ._performances_transformers import (
|
|
|
6
6
|
DiminishingValueTransformer as DiminishingValueTransformer,
|
|
7
7
|
MinMaxTransformer as MinMaxTransformer,
|
|
8
8
|
PartialStandardScaler as PartialStandardScaler,
|
|
9
|
+
QuantilePerformanceScaler as QuantilePerformanceScaler,
|
|
9
10
|
SymmetricDistributionTransformer as SymmetricDistributionTransformer,
|
|
10
11
|
)
|
|
@@ -4,6 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
from typing import Literal
|
|
5
5
|
|
|
6
6
|
import narwhals.stable.v2 as nw
|
|
7
|
+
import numpy as np
|
|
7
8
|
from narwhals.typing import IntoFrameT
|
|
8
9
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
9
10
|
|
|
@@ -11,6 +12,7 @@ from spforge.performance_transformers._performances_transformers import (
|
|
|
11
12
|
MinMaxTransformer,
|
|
12
13
|
NarwhalsFeatureTransformer,
|
|
13
14
|
PartialStandardScaler,
|
|
15
|
+
QuantilePerformanceScaler,
|
|
14
16
|
SymmetricDistributionTransformer,
|
|
15
17
|
)
|
|
16
18
|
|
|
@@ -86,9 +88,12 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
86
88
|
prefix: str = "performance__",
|
|
87
89
|
min_value: float = -0.02,
|
|
88
90
|
max_value: float = 1.02,
|
|
91
|
+
zero_inflation_threshold: float = 0.15,
|
|
89
92
|
):
|
|
90
93
|
self.features = features
|
|
91
94
|
self.prefix = prefix
|
|
95
|
+
# Store whether user explicitly disabled transformers (passed empty list)
|
|
96
|
+
self._user_disabled_transformers = transformer_names is not None and len(transformer_names) == 0
|
|
92
97
|
self.transformer_names = transformer_names or [
|
|
93
98
|
"symmetric",
|
|
94
99
|
"partial_standard_scaler",
|
|
@@ -100,6 +105,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
100
105
|
self.performance_column = self.prefix + performance_column
|
|
101
106
|
self.min_value = min_value
|
|
102
107
|
self.max_value = max_value
|
|
108
|
+
self.zero_inflation_threshold = zero_inflation_threshold
|
|
103
109
|
|
|
104
110
|
self.transformers = create_performance_scalers_transformers(
|
|
105
111
|
transformer_names=self.transformer_names,
|
|
@@ -107,9 +113,47 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
107
113
|
features=self.features,
|
|
108
114
|
prefix=self.prefix,
|
|
109
115
|
)
|
|
116
|
+
self._using_quantile_scaler = False
|
|
110
117
|
|
|
111
118
|
@nw.narwhalify
|
|
112
119
|
def fit(self, df: IntoFrameT, y=None):
|
|
120
|
+
# Check for zero-inflated distributions and swap to quantile scaler if needed
|
|
121
|
+
# Only apply when user hasn't explicitly disabled transformers (passed empty list)
|
|
122
|
+
if self.zero_inflation_threshold > 0 and not self._user_disabled_transformers:
|
|
123
|
+
df = self._ensure_inputs_exist(df, self.transformers[0])
|
|
124
|
+
prefixed_features = [self.prefix + f for f in self.features]
|
|
125
|
+
|
|
126
|
+
for feature in prefixed_features:
|
|
127
|
+
if feature in df.columns:
|
|
128
|
+
values = df[feature].to_numpy()
|
|
129
|
+
values = values[np.isfinite(values)]
|
|
130
|
+
|
|
131
|
+
# Skip if binary/categorical data (few unique values)
|
|
132
|
+
# Quantile scaler is for continuous zero-inflated data, not binary outcomes
|
|
133
|
+
n_unique = len(np.unique(values))
|
|
134
|
+
if n_unique <= 3:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
zero_proportion = np.mean(np.abs(values) < 1e-10)
|
|
138
|
+
|
|
139
|
+
if zero_proportion > self.zero_inflation_threshold:
|
|
140
|
+
logging.info(
|
|
141
|
+
f"Detected zero-inflated distribution for {feature} "
|
|
142
|
+
f"({zero_proportion:.1%} zeros). Using QuantilePerformanceScaler."
|
|
143
|
+
)
|
|
144
|
+
self._using_quantile_scaler = True
|
|
145
|
+
# Use original_transformers (deepcopy made before standard transformers
|
|
146
|
+
# were appended to custom_transformers)
|
|
147
|
+
self.transformers = [
|
|
148
|
+
copy.deepcopy(t) for t in self.original_transformers
|
|
149
|
+
] + [
|
|
150
|
+
QuantilePerformanceScaler(
|
|
151
|
+
features=prefixed_features,
|
|
152
|
+
prefix="",
|
|
153
|
+
)
|
|
154
|
+
]
|
|
155
|
+
break
|
|
156
|
+
|
|
113
157
|
for t in self.transformers:
|
|
114
158
|
df = self._ensure_inputs_exist(df, t)
|
|
115
159
|
t.fit(df)
|
|
@@ -169,6 +213,7 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
169
213
|
min_value: float = -0.02,
|
|
170
214
|
prefix: str = "performance__",
|
|
171
215
|
return_all_features: bool = False,
|
|
216
|
+
zero_inflation_threshold: float = 0.15,
|
|
172
217
|
):
|
|
173
218
|
self.weights = weights
|
|
174
219
|
self.return_all_features = return_all_features
|
|
@@ -181,6 +226,7 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
181
226
|
max_value=max_value,
|
|
182
227
|
min_value=min_value,
|
|
183
228
|
performance_column=performance_column,
|
|
229
|
+
zero_inflation_threshold=zero_inflation_threshold,
|
|
184
230
|
)
|
|
185
231
|
|
|
186
232
|
@nw.narwhalify
|
|
@@ -3,6 +3,7 @@ from typing import Literal, Protocol
|
|
|
3
3
|
|
|
4
4
|
import narwhals
|
|
5
5
|
import narwhals.stable.v2 as nw
|
|
6
|
+
import numpy as np
|
|
6
7
|
from lightgbm import LGBMRegressor
|
|
7
8
|
from narwhals.typing import IntoFrameT
|
|
8
9
|
from sklearn.base import BaseEstimator, TransformerMixin
|
|
@@ -420,3 +421,82 @@ class GroupByTransformer(BaseEstimator, TransformerMixin):
|
|
|
420
421
|
@nw.narwhalify
|
|
421
422
|
def transform(self, df: IntoFrameT) -> IntoFrameT:
|
|
422
423
|
return df.join(self._grouped, on=self.granularity, how="left").to_native()
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
|
|
427
|
+
"""
|
|
428
|
+
Quantile-based scaling for zero-inflated distributions.
|
|
429
|
+
|
|
430
|
+
Uses probability integral transform:
|
|
431
|
+
- Zeros → π/2 (midpoint of zero probability mass)
|
|
432
|
+
- Non-zeros → uniform on (π, 1) via empirical CDF
|
|
433
|
+
|
|
434
|
+
Fast: O(n log n) for fit, O(n) for transform.
|
|
435
|
+
"""
|
|
436
|
+
|
|
437
|
+
def __init__(
|
|
438
|
+
self,
|
|
439
|
+
features: list[str],
|
|
440
|
+
zero_threshold: float = 1e-10,
|
|
441
|
+
n_quantiles: int = 1000,
|
|
442
|
+
prefix: str = "",
|
|
443
|
+
):
|
|
444
|
+
self.features = features
|
|
445
|
+
self.zero_threshold = zero_threshold
|
|
446
|
+
self.n_quantiles = n_quantiles
|
|
447
|
+
self.prefix = prefix
|
|
448
|
+
self.features_out = [self.prefix + f for f in self.features]
|
|
449
|
+
|
|
450
|
+
self._zero_proportion: dict[str, float] = {}
|
|
451
|
+
self._nonzero_quantiles: dict[str, np.ndarray | None] = {}
|
|
452
|
+
|
|
453
|
+
@nw.narwhalify
|
|
454
|
+
def fit(self, df: IntoFrameT, y=None):
|
|
455
|
+
for feature in self.features:
|
|
456
|
+
values = df[feature].to_numpy()
|
|
457
|
+
values = values[np.isfinite(values)]
|
|
458
|
+
|
|
459
|
+
is_zero = np.abs(values) < self.zero_threshold
|
|
460
|
+
self._zero_proportion[feature] = np.mean(is_zero)
|
|
461
|
+
|
|
462
|
+
nonzero_values = values[~is_zero]
|
|
463
|
+
if len(nonzero_values) > 0:
|
|
464
|
+
percentiles = np.linspace(0, 100, self.n_quantiles + 1)
|
|
465
|
+
self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
|
|
466
|
+
else:
|
|
467
|
+
self._nonzero_quantiles[feature] = None
|
|
468
|
+
return self
|
|
469
|
+
|
|
470
|
+
@nw.narwhalify
|
|
471
|
+
def transform(self, df: IntoFrameT) -> IntoFrameT:
|
|
472
|
+
for feature in self.features:
|
|
473
|
+
out_feature = self.prefix + feature
|
|
474
|
+
values = df[feature].to_numpy()
|
|
475
|
+
result = np.full_like(values, np.nan, dtype=float)
|
|
476
|
+
|
|
477
|
+
# Handle NaN explicitly - preserve NaN in output
|
|
478
|
+
is_finite = np.isfinite(values)
|
|
479
|
+
is_zero = is_finite & (np.abs(values) < self.zero_threshold)
|
|
480
|
+
is_nonzero = is_finite & ~is_zero
|
|
481
|
+
|
|
482
|
+
pi = self._zero_proportion[feature]
|
|
483
|
+
|
|
484
|
+
# Zeros → midpoint of zero mass
|
|
485
|
+
result[is_zero] = pi / 2
|
|
486
|
+
|
|
487
|
+
# Non-zeros → interpolate to (π, 1)
|
|
488
|
+
nonzero_quantiles = self._nonzero_quantiles[feature]
|
|
489
|
+
if nonzero_quantiles is not None and np.any(is_nonzero):
|
|
490
|
+
nonzero_values = np.clip(
|
|
491
|
+
values[is_nonzero], nonzero_quantiles[0], nonzero_quantiles[-1]
|
|
492
|
+
)
|
|
493
|
+
ranks = np.interp(
|
|
494
|
+
nonzero_values,
|
|
495
|
+
nonzero_quantiles,
|
|
496
|
+
np.linspace(0, 1, len(nonzero_quantiles)),
|
|
497
|
+
)
|
|
498
|
+
result[is_nonzero] = pi + (1 - pi) * ranks
|
|
499
|
+
|
|
500
|
+
df = df.with_columns(**{out_feature: result})
|
|
501
|
+
|
|
502
|
+
return df.to_native()
|
|
@@ -29,6 +29,7 @@ from spforge.ratings._base import RatingGenerator, RatingKnownFeatures, RatingUn
|
|
|
29
29
|
from spforge.ratings.start_rating_generator import StartRatingGenerator
|
|
30
30
|
from spforge.ratings.utils import (
|
|
31
31
|
add_opp_team_rating,
|
|
32
|
+
add_player_opponent_mean_projected,
|
|
32
33
|
add_rating_difference_projected,
|
|
33
34
|
add_rating_mean_projected,
|
|
34
35
|
add_team_rating,
|
|
@@ -141,6 +142,9 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
141
142
|
self.PLAYER_DIFF_FROM_TEAM_PROJ_COL = self._suffix(
|
|
142
143
|
str(RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED)
|
|
143
144
|
)
|
|
145
|
+
self.PLAYER_OPP_MEAN_PROJ_COL = self._suffix(
|
|
146
|
+
str(RatingKnownFeatures.PLAYER_OPPONENT_MEAN_PROJECTED)
|
|
147
|
+
)
|
|
144
148
|
|
|
145
149
|
self.TEAM_OFF_RATING_PROJ_COL = self._suffix(
|
|
146
150
|
str(RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED)
|
|
@@ -844,6 +848,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
844
848
|
or self.OPP_RATING_PROJ_COL in cols_to_add
|
|
845
849
|
or self.DIFF_PROJ_COL in cols_to_add
|
|
846
850
|
or self.PLAYER_DIFF_PROJ_COL in cols_to_add
|
|
851
|
+
or self.PLAYER_OPP_MEAN_PROJ_COL in cols_to_add
|
|
847
852
|
):
|
|
848
853
|
df = add_team_rating_projected(
|
|
849
854
|
df=df,
|
|
@@ -865,6 +870,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
865
870
|
or self.OPP_RATING_PROJ_COL in cols_to_add
|
|
866
871
|
or self.DIFF_PROJ_COL in cols_to_add
|
|
867
872
|
or self.PLAYER_DIFF_PROJ_COL in cols_to_add
|
|
873
|
+
or self.PLAYER_OPP_MEAN_PROJ_COL in cols_to_add
|
|
868
874
|
):
|
|
869
875
|
df = add_opp_team_rating(
|
|
870
876
|
df=df,
|
|
@@ -925,6 +931,15 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
925
931
|
rating_mean_out=self.MEAN_PROJ_COL,
|
|
926
932
|
)
|
|
927
933
|
|
|
934
|
+
if self.PLAYER_OPP_MEAN_PROJ_COL in cols_to_add:
|
|
935
|
+
df = add_player_opponent_mean_projected(
|
|
936
|
+
df=df,
|
|
937
|
+
column_names=cn,
|
|
938
|
+
player_rating_col=self.PLAYER_RATING_COL,
|
|
939
|
+
opp_team_rating_col=self.OPP_RATING_PROJ_COL,
|
|
940
|
+
out_col=self.PLAYER_OPP_MEAN_PROJ_COL,
|
|
941
|
+
)
|
|
942
|
+
|
|
928
943
|
if self.DIFF_COL in cols_to_add and self.DIFF_COL not in df.columns:
|
|
929
944
|
if self.TEAM_RATING_COL not in df.columns:
|
|
930
945
|
df = add_team_rating(
|
spforge/ratings/enums.py
CHANGED
|
@@ -17,6 +17,7 @@ class RatingKnownFeatures(StrEnum):
|
|
|
17
17
|
PLAYER_RATING_DIFFERENCE_PROJECTED = "player_rating_difference_projected"
|
|
18
18
|
TEAM_RATING_DIFFERENCE_PROJECTED = "team_rating_difference_projected"
|
|
19
19
|
RATING_MEAN_PROJECTED = "rating_mean_projected"
|
|
20
|
+
PLAYER_OPPONENT_MEAN_PROJECTED = "player_opponent_mean_projected"
|
|
20
21
|
TEAM_LEAGUE = "team_league"
|
|
21
22
|
PLAYER_LEAGUE = "player_league"
|
|
22
23
|
OPPONENT_LEAGUE = "opponent_league"
|
spforge/ratings/utils.py
CHANGED
|
@@ -137,3 +137,16 @@ def add_rating_mean_projected(
|
|
|
137
137
|
)
|
|
138
138
|
|
|
139
139
|
return df.with_columns(pl.col(player_rating_col).mean().over(mid).alias(rating_mean_out))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def add_player_opponent_mean_projected(
|
|
143
|
+
df: pl.DataFrame,
|
|
144
|
+
column_names: ColumnNames,
|
|
145
|
+
player_rating_col: str,
|
|
146
|
+
opp_team_rating_col: str,
|
|
147
|
+
out_col: str,
|
|
148
|
+
) -> pl.DataFrame:
|
|
149
|
+
"""Mean of player rating and opponent team rating."""
|
|
150
|
+
return df.with_columns(
|
|
151
|
+
((pl.col(player_rating_col) + pl.col(opp_team_rating_col)) / 2).alias(out_col)
|
|
152
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.35
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -46,21 +46,21 @@ spforge/feature_generator/_utils.py,sha256=KDn33ia1OYJTK8THFpvc_uRiH_Bl3fImGqqbf
|
|
|
46
46
|
spforge/hyperparameter_tuning/__init__.py,sha256=Vcl8rVlJ7M708iPgqe4XxpZWgJKGux0Y5HgMCymRsHg,1099
|
|
47
47
|
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=SjwXLpvYIu_JY8uPRHeL5Kgp1aa0slWDz8qsKDaohWQ,8020
|
|
48
48
|
spforge/hyperparameter_tuning/_tuner.py,sha256=M79q3saM6r0UZJsRUUgfdDr-3Qii-F2-wuSAZLFtZDo,19246
|
|
49
|
-
spforge/performance_transformers/__init__.py,sha256=
|
|
50
|
-
spforge/performance_transformers/_performance_manager.py,sha256=
|
|
51
|
-
spforge/performance_transformers/_performances_transformers.py,sha256=
|
|
49
|
+
spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAbuKK9M9xHq7IGlQU,482
|
|
50
|
+
spforge/performance_transformers/_performance_manager.py,sha256=tR_4laGoC_KFRaw3Gy0TMI-r5gnicDmvmxPEgAvl4E0,12031
|
|
51
|
+
spforge/performance_transformers/_performances_transformers.py,sha256=ZjkFDXoEe5fURpN-dNkrgFXpHEg4aFCWdBDnPyLtgkM,18368
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=ne4BRrYFPqMirdFPVnyDN44wjFQwOQgWoUXu_59xgWE,14687
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=KkTmKtacx-1cMuncWVIkoO-3srfEOAjx8o5prEeaAWQ,67811
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
|
|
56
|
-
spforge/ratings/enums.py,sha256=
|
|
56
|
+
spforge/ratings/enums.py,sha256=maG0X4WMQeMVAc2wbceq1an-U-z8moZGeG2BAgfICDA,1809
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
58
|
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
59
59
|
spforge/ratings/player_performance_predictor.py,sha256=GtPpYlALgbQk8YHeaiRbpRvJHxeAhKpRxsaVUc9zR5o,7963
|
|
60
60
|
spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
|
|
61
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
62
62
|
spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
|
|
63
|
-
spforge/ratings/utils.py,sha256=
|
|
63
|
+
spforge/ratings/utils.py,sha256=WFxpiutHG9itJtjtagb26mjpHRjIhT7hopsiyetUgTg,4866
|
|
64
64
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
65
65
|
spforge/scorer/_score.py,sha256=DOl3wlHH0IlQelQA5CaNAfVtJhc544ZO5l-1mEno7nA,65276
|
|
66
66
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.35.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -92,9 +92,9 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
|
|
|
92
92
|
tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
|
|
93
93
|
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
|
|
95
|
-
tests/performance_transformers/test_performance_manager.py,sha256=
|
|
96
|
-
tests/performance_transformers/test_performances_transformers.py,sha256=
|
|
97
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
95
|
+
tests/performance_transformers/test_performance_manager.py,sha256=fVXxSujE3OKE7tIRJjN5dWCLj9pkeXbuL6Zf0WrM0ZA,15698
|
|
96
|
+
tests/performance_transformers/test_performances_transformers.py,sha256=HnW7GKQ6B0ova6Zy0lKbEpA6peZGFE4oi9Th6r7RnG0,18949
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=lFqFmEfy_sSyyeCmY0aCNaW3wj73ySVU3sp1O_m1os4,105713
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
100
100
|
tests/ratings/test_team_rating_generator.py,sha256=SqQcfckNmJJc99feCdnmkNYDape-p69e92Dp8Vzpu2w,101156
|
|
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
108
108
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
109
109
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
110
110
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
114
|
-
spforge-0.8.
|
|
111
|
+
spforge-0.8.35.dist-info/METADATA,sha256=9ZQ0JmZkbQIGI48KUUlHw8jI8umvspKRztLv1E0EW60,20048
|
|
112
|
+
spforge-0.8.35.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
113
|
+
spforge-0.8.35.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
114
|
+
spforge-0.8.35.dist-info/RECORD,,
|
|
@@ -4,7 +4,7 @@ import pandas as pd
|
|
|
4
4
|
import polars as pl
|
|
5
5
|
import pytest
|
|
6
6
|
|
|
7
|
-
from spforge.performance_transformers import PerformanceWeightsManager
|
|
7
|
+
from spforge.performance_transformers import PerformanceWeightsManager, QuantilePerformanceScaler
|
|
8
8
|
from spforge.performance_transformers._performance_manager import (
|
|
9
9
|
ColumnWeight,
|
|
10
10
|
PerformanceManager,
|
|
@@ -306,3 +306,131 @@ def test_factory_sets_transformer_features_to_prefixed_inputs_and_features_out_t
|
|
|
306
306
|
if idx + 1 < len(ts):
|
|
307
307
|
assert t.features_out == ts[idx + 1].features
|
|
308
308
|
assert t.features == expected_in
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class TestZeroInflationHandling:
|
|
312
|
+
@pytest.fixture
|
|
313
|
+
def zero_inflated_data(self):
|
|
314
|
+
"""Create zero-inflated data with ~37.7% zeros."""
|
|
315
|
+
np.random.seed(42)
|
|
316
|
+
n = 1000
|
|
317
|
+
zeros = np.zeros(377)
|
|
318
|
+
nonzeros = np.random.exponential(scale=2, size=n - 377)
|
|
319
|
+
raw = np.concatenate([zeros, nonzeros])
|
|
320
|
+
np.random.shuffle(raw)
|
|
321
|
+
return raw
|
|
322
|
+
|
|
323
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
324
|
+
def test_performance_manager_detects_zero_inflation(self, frame, zero_inflated_data):
|
|
325
|
+
"""Test that PerformanceManager auto-detects zero-inflated distributions."""
|
|
326
|
+
df = _make_native_df(frame, {"x": zero_inflated_data})
|
|
327
|
+
|
|
328
|
+
pm = PerformanceManager(
|
|
329
|
+
features=["x"],
|
|
330
|
+
transformer_names=["symmetric", "partial_standard_scaler", "min_max"],
|
|
331
|
+
prefix="performance__",
|
|
332
|
+
performance_column="perf",
|
|
333
|
+
zero_inflation_threshold=0.15,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
pm.fit(df)
|
|
337
|
+
|
|
338
|
+
# Should have switched to quantile scaler
|
|
339
|
+
assert pm._using_quantile_scaler is True
|
|
340
|
+
assert isinstance(pm.transformers[-1], QuantilePerformanceScaler)
|
|
341
|
+
|
|
342
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
343
|
+
def test_performance_manager_uses_standard_pipeline_for_normal_data(self, frame):
|
|
344
|
+
"""Test that PerformanceManager uses standard pipeline for non-zero-inflated data."""
|
|
345
|
+
np.random.seed(42)
|
|
346
|
+
# Normal distribution - no zero inflation
|
|
347
|
+
data = np.random.normal(loc=0.5, scale=0.1, size=1000)
|
|
348
|
+
df = _make_native_df(frame, {"x": data})
|
|
349
|
+
|
|
350
|
+
pm = PerformanceManager(
|
|
351
|
+
features=["x"],
|
|
352
|
+
transformer_names=["symmetric", "partial_standard_scaler", "min_max"],
|
|
353
|
+
prefix="performance__",
|
|
354
|
+
performance_column="perf",
|
|
355
|
+
zero_inflation_threshold=0.15,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
pm.fit(df)
|
|
359
|
+
|
|
360
|
+
# Should NOT have switched to quantile scaler
|
|
361
|
+
assert pm._using_quantile_scaler is False
|
|
362
|
+
assert isinstance(pm.transformers[-1], MinMaxTransformer)
|
|
363
|
+
|
|
364
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
365
|
+
def test_zero_inflation_output_properties(self, frame, zero_inflated_data):
|
|
366
|
+
"""Test that zero-inflated output has correct properties."""
|
|
367
|
+
df = _make_native_df(frame, {"x": zero_inflated_data})
|
|
368
|
+
|
|
369
|
+
pm = PerformanceManager(
|
|
370
|
+
features=["x"],
|
|
371
|
+
transformer_names=["symmetric", "partial_standard_scaler", "min_max"],
|
|
372
|
+
prefix="performance__",
|
|
373
|
+
performance_column="perf",
|
|
374
|
+
zero_inflation_threshold=0.15,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
result = pm.fit_transform(df)
|
|
378
|
+
result_nw = nw.from_native(result)
|
|
379
|
+
scaled = result_nw["performance__perf"].to_numpy()
|
|
380
|
+
|
|
381
|
+
# 1. All zeros should have the same scaled value (the midpoint of zero mass)
|
|
382
|
+
is_zero = np.abs(zero_inflated_data) < 1e-10
|
|
383
|
+
zero_scaled_values = scaled[is_zero]
|
|
384
|
+
assert np.allclose(zero_scaled_values, zero_scaled_values[0], atol=1e-10)
|
|
385
|
+
|
|
386
|
+
# 2. Zeros should have lower values than non-zeros (on average)
|
|
387
|
+
is_nonzero = ~is_zero
|
|
388
|
+
assert np.mean(scaled[is_zero]) < np.mean(scaled[is_nonzero])
|
|
389
|
+
|
|
390
|
+
# 3. Mean should be approximately 0.5
|
|
391
|
+
assert abs(np.mean(scaled) - 0.5) < 0.02
|
|
392
|
+
|
|
393
|
+
# 4. Monotonicity preserved
|
|
394
|
+
order = np.argsort(zero_inflated_data)
|
|
395
|
+
sorted_scaled = scaled[order]
|
|
396
|
+
assert np.all(np.diff(sorted_scaled) >= -1e-10)
|
|
397
|
+
|
|
398
|
+
# 5. Bounded [0, 1] (with clipping tolerance)
|
|
399
|
+
assert np.all((scaled >= pm.min_value) & (scaled <= pm.max_value))
|
|
400
|
+
|
|
401
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
402
|
+
def test_disable_zero_inflation_detection(self, frame, zero_inflated_data):
|
|
403
|
+
"""Test that zero_inflation_threshold=0 disables detection."""
|
|
404
|
+
df = _make_native_df(frame, {"x": zero_inflated_data})
|
|
405
|
+
|
|
406
|
+
pm = PerformanceManager(
|
|
407
|
+
features=["x"],
|
|
408
|
+
transformer_names=["symmetric", "partial_standard_scaler", "min_max"],
|
|
409
|
+
prefix="performance__",
|
|
410
|
+
performance_column="perf",
|
|
411
|
+
zero_inflation_threshold=0, # Disable detection
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
pm.fit(df)
|
|
415
|
+
|
|
416
|
+
# Should NOT have switched to quantile scaler
|
|
417
|
+
assert pm._using_quantile_scaler is False
|
|
418
|
+
|
|
419
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
420
|
+
def test_performance_weights_manager_zero_inflation(self, frame, zero_inflated_data):
|
|
421
|
+
"""Test that PerformanceWeightsManager also handles zero inflation."""
|
|
422
|
+
df = _make_native_df(frame, {"feat_a": zero_inflated_data})
|
|
423
|
+
|
|
424
|
+
weights = [ColumnWeight(name="feat_a", weight=1.0)]
|
|
425
|
+
manager = PerformanceWeightsManager(
|
|
426
|
+
weights=weights,
|
|
427
|
+
# Use default transformers (None) to enable zero inflation detection
|
|
428
|
+
transformer_names=None,
|
|
429
|
+
prefix="",
|
|
430
|
+
zero_inflation_threshold=0.15,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
manager.fit(df)
|
|
434
|
+
|
|
435
|
+
# Should have switched to quantile scaler
|
|
436
|
+
assert manager._using_quantile_scaler is True
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import numpy as np
|
|
1
2
|
import pandas as pd
|
|
2
3
|
import polars as pl
|
|
3
4
|
import pytest
|
|
@@ -6,6 +7,7 @@ from sklearn.linear_model import LinearRegression
|
|
|
6
7
|
|
|
7
8
|
from spforge.performance_transformers import (
|
|
8
9
|
DiminishingValueTransformer,
|
|
10
|
+
QuantilePerformanceScaler,
|
|
9
11
|
SymmetricDistributionTransformer,
|
|
10
12
|
)
|
|
11
13
|
from spforge.performance_transformers._performances_transformers import (
|
|
@@ -355,3 +357,197 @@ def test_symmetric_distribution_transformer_with_granularity_fit_transform():
|
|
|
355
357
|
abs(transformed_df.loc[lambda x: x.position == "SG"]["performance"].skew())
|
|
356
358
|
< transformer.skewness_allowed
|
|
357
359
|
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
class TestQuantilePerformanceScaler:
|
|
363
|
+
@pytest.fixture
|
|
364
|
+
def zero_inflated_data(self):
|
|
365
|
+
"""Create zero-inflated data with ~37.7% zeros."""
|
|
366
|
+
np.random.seed(42)
|
|
367
|
+
n = 1000
|
|
368
|
+
# ~37.7% zeros
|
|
369
|
+
zeros = np.zeros(377)
|
|
370
|
+
# Non-zeros from exponential distribution
|
|
371
|
+
nonzeros = np.random.exponential(scale=2, size=n - 377)
|
|
372
|
+
raw = np.concatenate([zeros, nonzeros])
|
|
373
|
+
np.random.shuffle(raw)
|
|
374
|
+
return raw
|
|
375
|
+
|
|
376
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
377
|
+
def test_zeros_map_to_midpoint(self, df_type, zero_inflated_data):
|
|
378
|
+
"""Test that zeros map to π/2 (midpoint of zero probability mass)."""
|
|
379
|
+
df = df_type({"performance": zero_inflated_data})
|
|
380
|
+
|
|
381
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
382
|
+
transformed = scaler.fit_transform(df)
|
|
383
|
+
|
|
384
|
+
if isinstance(transformed, pd.DataFrame):
|
|
385
|
+
scaled = transformed["performance"].values
|
|
386
|
+
else:
|
|
387
|
+
scaled = transformed["performance"].to_numpy()
|
|
388
|
+
|
|
389
|
+
pi = scaler._zero_proportion["performance"]
|
|
390
|
+
is_zero = np.abs(zero_inflated_data) < 1e-10
|
|
391
|
+
|
|
392
|
+
# Zeros should map to π/2
|
|
393
|
+
assert np.allclose(scaled[is_zero], pi / 2, atol=1e-10)
|
|
394
|
+
|
|
395
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
396
|
+
def test_mean_approximately_half(self, df_type, zero_inflated_data):
|
|
397
|
+
"""Test that mean ≈ 0.5."""
|
|
398
|
+
df = df_type({"performance": zero_inflated_data})
|
|
399
|
+
|
|
400
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
401
|
+
transformed = scaler.fit_transform(df)
|
|
402
|
+
|
|
403
|
+
if isinstance(transformed, pd.DataFrame):
|
|
404
|
+
scaled = transformed["performance"].values
|
|
405
|
+
else:
|
|
406
|
+
scaled = transformed["performance"].to_numpy()
|
|
407
|
+
|
|
408
|
+
# Mean should be approximately 0.5
|
|
409
|
+
assert abs(np.mean(scaled) - 0.5) < 0.02
|
|
410
|
+
|
|
411
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
412
|
+
def test_monotonicity_preserved(self, df_type, zero_inflated_data):
|
|
413
|
+
"""Test that monotonicity is preserved (sorted input → sorted output)."""
|
|
414
|
+
df = df_type({"performance": zero_inflated_data})
|
|
415
|
+
|
|
416
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
417
|
+
transformed = scaler.fit_transform(df)
|
|
418
|
+
|
|
419
|
+
if isinstance(transformed, pd.DataFrame):
|
|
420
|
+
scaled = transformed["performance"].values
|
|
421
|
+
else:
|
|
422
|
+
scaled = transformed["performance"].to_numpy()
|
|
423
|
+
|
|
424
|
+
# Check monotonicity: if we sort the raw data, the scaled values should also be sorted
|
|
425
|
+
order = np.argsort(zero_inflated_data)
|
|
426
|
+
sorted_scaled = scaled[order]
|
|
427
|
+
# Allow for tiny numerical errors
|
|
428
|
+
assert np.all(np.diff(sorted_scaled) >= -1e-10)
|
|
429
|
+
|
|
430
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
431
|
+
def test_bounded_zero_one(self, df_type, zero_inflated_data):
|
|
432
|
+
"""Test that output is bounded [0, 1]."""
|
|
433
|
+
df = df_type({"performance": zero_inflated_data})
|
|
434
|
+
|
|
435
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
436
|
+
transformed = scaler.fit_transform(df)
|
|
437
|
+
|
|
438
|
+
if isinstance(transformed, pd.DataFrame):
|
|
439
|
+
scaled = transformed["performance"].values
|
|
440
|
+
else:
|
|
441
|
+
scaled = transformed["performance"].to_numpy()
|
|
442
|
+
|
|
443
|
+
assert np.all((scaled >= 0) & (scaled <= 1))
|
|
444
|
+
|
|
445
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
446
|
+
def test_nonzeros_span_pi_to_one(self, df_type, zero_inflated_data):
|
|
447
|
+
"""Test that non-zeros map to range (π, 1)."""
|
|
448
|
+
df = df_type({"performance": zero_inflated_data})
|
|
449
|
+
|
|
450
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
451
|
+
transformed = scaler.fit_transform(df)
|
|
452
|
+
|
|
453
|
+
if isinstance(transformed, pd.DataFrame):
|
|
454
|
+
scaled = transformed["performance"].values
|
|
455
|
+
else:
|
|
456
|
+
scaled = transformed["performance"].to_numpy()
|
|
457
|
+
|
|
458
|
+
pi = scaler._zero_proportion["performance"]
|
|
459
|
+
is_nonzero = np.abs(zero_inflated_data) >= 1e-10
|
|
460
|
+
|
|
461
|
+
# Non-zeros should be >= π
|
|
462
|
+
assert np.all(scaled[is_nonzero] >= pi - 1e-10)
|
|
463
|
+
# Non-zeros should be <= 1
|
|
464
|
+
assert np.all(scaled[is_nonzero] <= 1 + 1e-10)
|
|
465
|
+
|
|
466
|
+
def test_with_prefix(self):
|
|
467
|
+
"""Test that prefix is applied correctly."""
|
|
468
|
+
np.random.seed(42)
|
|
469
|
+
raw = np.concatenate([np.zeros(50), np.random.exponential(2, 50)])
|
|
470
|
+
df = pd.DataFrame({"feat": raw})
|
|
471
|
+
|
|
472
|
+
scaler = QuantilePerformanceScaler(features=["feat"], prefix="scaled_")
|
|
473
|
+
transformed = scaler.fit_transform(df)
|
|
474
|
+
|
|
475
|
+
assert "scaled_feat" in transformed.columns
|
|
476
|
+
assert scaler.features_out == ["scaled_feat"]
|
|
477
|
+
|
|
478
|
+
def test_multiple_features(self):
|
|
479
|
+
"""Test that multiple features are handled correctly."""
|
|
480
|
+
np.random.seed(42)
|
|
481
|
+
raw_a = np.concatenate([np.zeros(50), np.random.exponential(2, 50)])
|
|
482
|
+
raw_b = np.concatenate([np.zeros(30), np.random.exponential(3, 70)])
|
|
483
|
+
df = pd.DataFrame({"a": raw_a, "b": raw_b})
|
|
484
|
+
|
|
485
|
+
scaler = QuantilePerformanceScaler(features=["a", "b"], prefix="")
|
|
486
|
+
transformed = scaler.fit_transform(df)
|
|
487
|
+
|
|
488
|
+
assert "a" in transformed.columns
|
|
489
|
+
assert "b" in transformed.columns
|
|
490
|
+
|
|
491
|
+
# Both should have mean ≈ 0.5
|
|
492
|
+
assert abs(transformed["a"].mean() - 0.5) < 0.05
|
|
493
|
+
assert abs(transformed["b"].mean() - 0.5) < 0.05
|
|
494
|
+
|
|
495
|
+
def test_all_zeros(self):
|
|
496
|
+
"""Test edge case: all values are zero (π=1)."""
|
|
497
|
+
df = pd.DataFrame({"x": [0.0, 0.0, 0.0, 0.0, 0.0]})
|
|
498
|
+
|
|
499
|
+
scaler = QuantilePerformanceScaler(features=["x"], prefix="")
|
|
500
|
+
transformed = scaler.fit_transform(df)
|
|
501
|
+
|
|
502
|
+
# π=1, so all values should map to π/2 = 0.5
|
|
503
|
+
assert np.allclose(transformed["x"].values, 0.5)
|
|
504
|
+
assert scaler._zero_proportion["x"] == 1.0
|
|
505
|
+
|
|
506
|
+
def test_no_zeros(self):
|
|
507
|
+
"""Test edge case: no zeros (π=0)."""
|
|
508
|
+
np.random.seed(42)
|
|
509
|
+
df = pd.DataFrame({"x": np.random.exponential(2, 100) + 0.1}) # All positive
|
|
510
|
+
|
|
511
|
+
scaler = QuantilePerformanceScaler(features=["x"], prefix="")
|
|
512
|
+
transformed = scaler.fit_transform(df)
|
|
513
|
+
|
|
514
|
+
# π=0, so values should span (0, 1) via quantiles
|
|
515
|
+
assert scaler._zero_proportion["x"] == 0.0
|
|
516
|
+
assert transformed["x"].min() >= 0
|
|
517
|
+
assert transformed["x"].max() <= 1
|
|
518
|
+
# Mean should still be ~0.5
|
|
519
|
+
assert abs(transformed["x"].mean() - 0.5) < 0.05
|
|
520
|
+
|
|
521
|
+
def test_nan_handling(self):
|
|
522
|
+
"""Test that NaN values are preserved in output."""
|
|
523
|
+
df = pd.DataFrame({"x": [0.0, 1.0, np.nan, 2.0, 0.0, np.nan, 3.0]})
|
|
524
|
+
|
|
525
|
+
scaler = QuantilePerformanceScaler(features=["x"], prefix="")
|
|
526
|
+
transformed = scaler.fit_transform(df)
|
|
527
|
+
|
|
528
|
+
# NaN positions should remain NaN
|
|
529
|
+
assert np.isnan(transformed["x"].iloc[2])
|
|
530
|
+
assert np.isnan(transformed["x"].iloc[5])
|
|
531
|
+
|
|
532
|
+
# Non-NaN values should be valid
|
|
533
|
+
non_nan_mask = ~np.isnan(transformed["x"].values)
|
|
534
|
+
assert np.all((transformed["x"].values[non_nan_mask] >= 0) &
|
|
535
|
+
(transformed["x"].values[non_nan_mask] <= 1))
|
|
536
|
+
|
|
537
|
+
def test_single_unique_nonzero(self):
|
|
538
|
+
"""Test edge case: single unique non-zero value."""
|
|
539
|
+
df = pd.DataFrame({"x": [0.0, 0.0, 5.0, 5.0, 0.0, 5.0]})
|
|
540
|
+
|
|
541
|
+
scaler = QuantilePerformanceScaler(features=["x"], prefix="")
|
|
542
|
+
transformed = scaler.fit_transform(df)
|
|
543
|
+
|
|
544
|
+
# Should still work - zeros map to π/2, non-zeros to (π, 1)
|
|
545
|
+
pi = scaler._zero_proportion["x"]
|
|
546
|
+
is_zero = df["x"] == 0
|
|
547
|
+
|
|
548
|
+
# Zeros should map to π/2
|
|
549
|
+
assert np.allclose(transformed["x"].values[is_zero.values], pi / 2)
|
|
550
|
+
|
|
551
|
+
# Non-zeros should all map to same value (since they're all equal)
|
|
552
|
+
nonzero_values = transformed["x"].values[~is_zero.values]
|
|
553
|
+
assert np.allclose(nonzero_values, nonzero_values[0])
|
|
@@ -1951,9 +1951,11 @@ def test_fit_transform__player_rating_difference_from_team_projected_feature(bas
|
|
|
1951
1951
|
assert player_col in result.columns
|
|
1952
1952
|
assert team_col in result.columns
|
|
1953
1953
|
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1954
|
+
# Verify diff = player - team (vectorized)
|
|
1955
|
+
max_diff = result.select(
|
|
1956
|
+
(pl.col(diff_col) - (pl.col(player_col) - pl.col(team_col))).abs().max()
|
|
1957
|
+
).item()
|
|
1958
|
+
assert max_diff < 1e-9
|
|
1957
1959
|
|
|
1958
1960
|
|
|
1959
1961
|
def test_fit_transform__start_league_quantile_uses_existing_player_ratings(base_cn):
|
|
@@ -2909,3 +2911,30 @@ def test_fit_transform_null_perf_with_use_off_def_split_false__no_crash(base_cn)
|
|
|
2909
2911
|
# TypeError: float() argument must be a string or a number, not 'NoneType'
|
|
2910
2912
|
result = gen.fit_transform(df)
|
|
2911
2913
|
assert result is not None
|
|
2914
|
+
|
|
2915
|
+
|
|
2916
|
+
def test_player_opponent_mean_projected_feature(base_cn, sample_df):
|
|
2917
|
+
"""Test that PLAYER_OPPONENT_MEAN_PROJECTED outputs mean of player and opponent team ratings."""
|
|
2918
|
+
gen = PlayerRatingGenerator(
|
|
2919
|
+
performance_column="perf",
|
|
2920
|
+
column_names=base_cn,
|
|
2921
|
+
auto_scale_performance=True,
|
|
2922
|
+
features_out=[
|
|
2923
|
+
RatingKnownFeatures.PLAYER_RATING,
|
|
2924
|
+
RatingKnownFeatures.OPPONENT_RATING_PROJECTED,
|
|
2925
|
+
RatingKnownFeatures.PLAYER_OPPONENT_MEAN_PROJECTED,
|
|
2926
|
+
],
|
|
2927
|
+
)
|
|
2928
|
+
result = gen.fit_transform(sample_df)
|
|
2929
|
+
|
|
2930
|
+
# Verify column exists
|
|
2931
|
+
assert "player_opponent_mean_projected_perf" in result.columns
|
|
2932
|
+
|
|
2933
|
+
# Verify it's the mean of player_rating and opponent_rating_projected (vectorized)
|
|
2934
|
+
expected = (
|
|
2935
|
+
pl.col("player_rating_perf") + pl.col("opponent_rating_projected_perf")
|
|
2936
|
+
) / 2
|
|
2937
|
+
diff = result.select(
|
|
2938
|
+
(pl.col("player_opponent_mean_projected_perf") - expected).abs().max()
|
|
2939
|
+
).item()
|
|
2940
|
+
assert diff < 1e-6, f"Max difference from expected mean: {diff}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|