spforge 0.8.37__py3-none-any.whl → 0.8.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/performance_transformers/_performance_manager.py +5 -0
- spforge/performance_transformers/_performances_transformers.py +72 -6
- spforge/ratings/_base.py +32 -1
- {spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/METADATA +1 -1
- {spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/RECORD +10 -10
- tests/performance_transformers/test_performance_manager.py +45 -0
- tests/performance_transformers/test_performances_transformers.py +38 -0
- {spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/WHEEL +0 -0
- {spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/top_level.txt +0 -0
|
@@ -89,6 +89,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
89
89
|
min_value: float = 0.0,
|
|
90
90
|
max_value: float = 1.0,
|
|
91
91
|
zero_inflation_threshold: float = 0.15,
|
|
92
|
+
quantile_weight_column: str | None = None,
|
|
92
93
|
):
|
|
93
94
|
self.features = features
|
|
94
95
|
self.prefix = prefix
|
|
@@ -106,6 +107,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
106
107
|
self.min_value = min_value
|
|
107
108
|
self.max_value = max_value
|
|
108
109
|
self.zero_inflation_threshold = zero_inflation_threshold
|
|
110
|
+
self.quantile_weight_column = quantile_weight_column
|
|
109
111
|
|
|
110
112
|
self.transformers = create_performance_scalers_transformers(
|
|
111
113
|
transformer_names=self.transformer_names,
|
|
@@ -150,6 +152,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
|
|
|
150
152
|
QuantilePerformanceScaler(
|
|
151
153
|
features=prefixed_features,
|
|
152
154
|
prefix="",
|
|
155
|
+
weight_column=self.quantile_weight_column,
|
|
153
156
|
)
|
|
154
157
|
]
|
|
155
158
|
break
|
|
@@ -214,6 +217,7 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
214
217
|
prefix: str = "performance__",
|
|
215
218
|
return_all_features: bool = False,
|
|
216
219
|
zero_inflation_threshold: float = 0.15,
|
|
220
|
+
quantile_weight_column: str | None = None,
|
|
217
221
|
):
|
|
218
222
|
self.weights = weights
|
|
219
223
|
self.return_all_features = return_all_features
|
|
@@ -227,6 +231,7 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
227
231
|
min_value=min_value,
|
|
228
232
|
performance_column=performance_column,
|
|
229
233
|
zero_inflation_threshold=zero_inflation_threshold,
|
|
234
|
+
quantile_weight_column=quantile_weight_column,
|
|
230
235
|
)
|
|
231
236
|
|
|
232
237
|
@nw.narwhalify
|
|
@@ -432,6 +432,9 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
|
|
|
432
432
|
- Non-zeros → uniform on (π, 1) via empirical CDF
|
|
433
433
|
|
|
434
434
|
Fast: O(n log n) for fit, O(n) for transform.
|
|
435
|
+
|
|
436
|
+
If weight_column is provided, weighted quantiles are computed so that
|
|
437
|
+
the scaling respects participation weights (e.g., minutes played).
|
|
435
438
|
"""
|
|
436
439
|
|
|
437
440
|
def __init__(
|
|
@@ -440,11 +443,13 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
|
|
|
440
443
|
zero_threshold: float = 1e-10,
|
|
441
444
|
n_quantiles: int = 1000,
|
|
442
445
|
prefix: str = "",
|
|
446
|
+
weight_column: str | None = None,
|
|
443
447
|
):
|
|
444
448
|
self.features = features
|
|
445
449
|
self.zero_threshold = zero_threshold
|
|
446
450
|
self.n_quantiles = n_quantiles
|
|
447
451
|
self.prefix = prefix
|
|
452
|
+
self.weight_column = weight_column
|
|
448
453
|
self.features_out = [self.prefix + f for f in self.features]
|
|
449
454
|
|
|
450
455
|
self._zero_proportion: dict[str, float] = {}
|
|
@@ -452,21 +457,82 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
|
|
|
452
457
|
|
|
453
458
|
@nw.narwhalify
|
|
454
459
|
def fit(self, df: IntoFrameT, y=None):
|
|
460
|
+
# Get weights if specified
|
|
461
|
+
weights = None
|
|
462
|
+
if self.weight_column is not None:
|
|
463
|
+
weights = df[self.weight_column].to_numpy()
|
|
464
|
+
|
|
455
465
|
for feature in self.features:
|
|
456
466
|
values = df[feature].to_numpy()
|
|
457
|
-
values = values[np.isfinite(values)]
|
|
458
467
|
|
|
459
|
-
|
|
460
|
-
|
|
468
|
+
# Create finite mask
|
|
469
|
+
finite_mask = np.isfinite(values)
|
|
470
|
+
if weights is not None:
|
|
471
|
+
# Also require finite, positive weights
|
|
472
|
+
weight_valid = np.isfinite(weights) & (weights > 0)
|
|
473
|
+
finite_mask = finite_mask & weight_valid
|
|
474
|
+
|
|
475
|
+
values_finite = values[finite_mask]
|
|
476
|
+
|
|
477
|
+
if weights is not None:
|
|
478
|
+
weights_finite = weights[finite_mask]
|
|
479
|
+
else:
|
|
480
|
+
weights_finite = None
|
|
481
|
+
|
|
482
|
+
is_zero = np.abs(values_finite) < self.zero_threshold
|
|
483
|
+
|
|
484
|
+
if weights_finite is not None:
|
|
485
|
+
# Weighted zero proportion: sum(weights where zero) / sum(weights)
|
|
486
|
+
total_weight = np.sum(weights_finite)
|
|
487
|
+
if total_weight > 0:
|
|
488
|
+
self._zero_proportion[feature] = np.sum(weights_finite[is_zero]) / total_weight
|
|
489
|
+
else:
|
|
490
|
+
self._zero_proportion[feature] = np.mean(is_zero)
|
|
491
|
+
else:
|
|
492
|
+
self._zero_proportion[feature] = np.mean(is_zero)
|
|
493
|
+
|
|
494
|
+
nonzero_mask = ~is_zero
|
|
495
|
+
nonzero_values = values_finite[nonzero_mask]
|
|
461
496
|
|
|
462
|
-
nonzero_values = values[~is_zero]
|
|
463
497
|
if len(nonzero_values) > 0:
|
|
464
|
-
|
|
465
|
-
|
|
498
|
+
if weights_finite is not None:
|
|
499
|
+
# Weighted quantiles using interpolation on weighted CDF
|
|
500
|
+
nonzero_weights = weights_finite[nonzero_mask]
|
|
501
|
+
self._nonzero_quantiles[feature] = self._compute_weighted_quantiles(
|
|
502
|
+
nonzero_values, nonzero_weights
|
|
503
|
+
)
|
|
504
|
+
else:
|
|
505
|
+
percentiles = np.linspace(0, 100, self.n_quantiles + 1)
|
|
506
|
+
self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
|
|
466
507
|
else:
|
|
467
508
|
self._nonzero_quantiles[feature] = None
|
|
468
509
|
return self
|
|
469
510
|
|
|
511
|
+
def _compute_weighted_quantiles(
|
|
512
|
+
self, values: np.ndarray, weights: np.ndarray
|
|
513
|
+
) -> np.ndarray:
|
|
514
|
+
"""Compute weighted quantiles using weighted CDF interpolation."""
|
|
515
|
+
# Sort by value
|
|
516
|
+
order = np.argsort(values)
|
|
517
|
+
sorted_values = values[order]
|
|
518
|
+
sorted_weights = weights[order]
|
|
519
|
+
|
|
520
|
+
# Compute weighted CDF
|
|
521
|
+
cumulative_weights = np.cumsum(sorted_weights)
|
|
522
|
+
total_weight = cumulative_weights[-1]
|
|
523
|
+
|
|
524
|
+
# Normalize CDF to [0, 1]
|
|
525
|
+
cdf = cumulative_weights / total_weight
|
|
526
|
+
|
|
527
|
+
# Sample quantiles at evenly spaced CDF positions
|
|
528
|
+
target_cdf = np.linspace(0, 1, self.n_quantiles + 1)
|
|
529
|
+
|
|
530
|
+
# Interpolate to get quantile values
|
|
531
|
+
# Use np.interp which handles edge cases gracefully
|
|
532
|
+
quantiles = np.interp(target_cdf, cdf, sorted_values)
|
|
533
|
+
|
|
534
|
+
return quantiles
|
|
535
|
+
|
|
470
536
|
@nw.narwhalify
|
|
471
537
|
def transform(self, df: IntoFrameT) -> IntoFrameT:
|
|
472
538
|
for feature in self.features:
|
spforge/ratings/_base.py
CHANGED
|
@@ -7,6 +7,7 @@ from abc import abstractmethod
|
|
|
7
7
|
from typing import Any, Literal
|
|
8
8
|
|
|
9
9
|
import narwhals.stable.v2 as nw
|
|
10
|
+
import numpy as np
|
|
10
11
|
import polars as pl
|
|
11
12
|
from narwhals.stable.v2 import DataFrame
|
|
12
13
|
from narwhals.stable.v2.typing import IntoFrameT
|
|
@@ -149,6 +150,17 @@ class RatingGenerator(FeatureGenerator):
|
|
|
149
150
|
|
|
150
151
|
if self.performance_manager:
|
|
151
152
|
if self.performance_manager:
|
|
153
|
+
# Wire in participation weight column for weighted quantile scaling
|
|
154
|
+
# This ensures zero-inflated distributions use weights for calibration
|
|
155
|
+
if (
|
|
156
|
+
self.column_names
|
|
157
|
+
and self.column_names.participation_weight
|
|
158
|
+
and self.column_names.participation_weight in df.columns
|
|
159
|
+
):
|
|
160
|
+
self.performance_manager.quantile_weight_column = (
|
|
161
|
+
self.column_names.participation_weight
|
|
162
|
+
)
|
|
163
|
+
|
|
152
164
|
ori_perf_values = df[self.performance_manager.ori_performance_column].to_list()
|
|
153
165
|
df = nw.from_native(self.performance_manager.fit_transform(df))
|
|
154
166
|
assert (
|
|
@@ -165,7 +177,26 @@ class RatingGenerator(FeatureGenerator):
|
|
|
165
177
|
"Either transform it manually or set auto_scale_performance to True"
|
|
166
178
|
)
|
|
167
179
|
|
|
168
|
-
|
|
180
|
+
# Use weighted mean when weighted quantile scaling is active
|
|
181
|
+
# because the weighted mean is what's calibrated to 0.5
|
|
182
|
+
if (
|
|
183
|
+
self.performance_manager
|
|
184
|
+
and self.performance_manager._using_quantile_scaler
|
|
185
|
+
and self.performance_manager.quantile_weight_column
|
|
186
|
+
and self.performance_manager.quantile_weight_column in df.columns
|
|
187
|
+
):
|
|
188
|
+
weights = df[self.performance_manager.quantile_weight_column]
|
|
189
|
+
valid_mask = perf.is_finite() & weights.is_finite() & (weights > 0)
|
|
190
|
+
if valid_mask.sum() > 0:
|
|
191
|
+
perf_values = perf.filter(valid_mask).to_numpy()
|
|
192
|
+
weight_values = weights.filter(valid_mask).to_numpy()
|
|
193
|
+
mean_val = float(np.average(perf_values, weights=weight_values))
|
|
194
|
+
else:
|
|
195
|
+
mean_val = float(finite_perf.mean())
|
|
196
|
+
else:
|
|
197
|
+
mean_val = float(finite_perf.mean())
|
|
198
|
+
|
|
199
|
+
if mean_val < 0.42 or mean_val > 0.58:
|
|
169
200
|
raise ValueError(
|
|
170
201
|
f"Mean {self.performance_column} must be between 0.42 and 0.58. "
|
|
171
202
|
"Either transform it manually or set auto_scale_performance to True"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.39
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -47,10 +47,10 @@ spforge/hyperparameter_tuning/__init__.py,sha256=Vcl8rVlJ7M708iPgqe4XxpZWgJKGux0
|
|
|
47
47
|
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=SjwXLpvYIu_JY8uPRHeL5Kgp1aa0slWDz8qsKDaohWQ,8020
|
|
48
48
|
spforge/hyperparameter_tuning/_tuner.py,sha256=M79q3saM6r0UZJsRUUgfdDr-3Qii-F2-wuSAZLFtZDo,19246
|
|
49
49
|
spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAbuKK9M9xHq7IGlQU,482
|
|
50
|
-
spforge/performance_transformers/_performance_manager.py,sha256=
|
|
51
|
-
spforge/performance_transformers/_performances_transformers.py,sha256=
|
|
50
|
+
spforge/performance_transformers/_performance_manager.py,sha256=lh7enqYLd1lXj1VTOiK5N880xkil5q1jRsM51fe_K5g,12322
|
|
51
|
+
spforge/performance_transformers/_performances_transformers.py,sha256=nmjJTEH86JjFneWsnSWIYnUXQoUDskOraDO3VtuufIY,20931
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
|
-
spforge/ratings/_base.py,sha256=
|
|
53
|
+
spforge/ratings/_base.py,sha256=Stl_Y2gjQfS1jq_6CfeRG_e3R5Pei34WETdG6CaibGs,16487
|
|
54
54
|
spforge/ratings/_player_rating.py,sha256=AIpDEl6cZaC3urcY-jFFgUWd4WZ71A33c5mOPfkXdMs,68178
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
|
|
56
56
|
spforge/ratings/enums.py,sha256=maG0X4WMQeMVAc2wbceq1an-U-z8moZGeG2BAgfICDA,1809
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.39.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -92,8 +92,8 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
|
|
|
92
92
|
tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
|
|
93
93
|
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
|
|
95
|
-
tests/performance_transformers/test_performance_manager.py,sha256=
|
|
96
|
-
tests/performance_transformers/test_performances_transformers.py,sha256=
|
|
95
|
+
tests/performance_transformers/test_performance_manager.py,sha256=Ob4s86hdnR_4RC9ZG3lpB5O4Gysr2cLyTmCsO6uWomc,21244
|
|
96
|
+
tests/performance_transformers/test_performances_transformers.py,sha256=2OLpFgBolU8e-1Pga3hiOGWWHhjYpfx8Qrf9YXiqjUw,20919
|
|
97
97
|
tests/ratings/test_player_rating_generator.py,sha256=1Pkx0H8xJMTeLc2Fu9zJcoDpBWiY2zCVSxuBFJk2uEs,110717
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
108
108
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
109
109
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
110
110
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
114
|
-
spforge-0.8.
|
|
111
|
+
spforge-0.8.39.dist-info/METADATA,sha256=njbTQ33nwPOZ71PhHQDxUWZzP4MjSavx8sT-JgK2fio,20048
|
|
112
|
+
spforge-0.8.39.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
113
|
+
spforge-0.8.39.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
114
|
+
spforge-0.8.39.dist-info/RECORD,,
|
|
@@ -436,6 +436,51 @@ class TestZeroInflationHandling:
|
|
|
436
436
|
assert manager._using_quantile_scaler is True
|
|
437
437
|
|
|
438
438
|
|
|
439
|
+
class TestWeightedQuantileScaling:
|
|
440
|
+
"""Test that RatingGenerator wires participation weights to quantile scaling."""
|
|
441
|
+
|
|
442
|
+
def test_rating_generator_wires_weight_column(self):
|
|
443
|
+
"""
|
|
444
|
+
RatingGenerator should automatically wire participation_weight to
|
|
445
|
+
quantile_weight_column when using auto_scale_performance with zero-inflated data.
|
|
446
|
+
"""
|
|
447
|
+
from spforge import ColumnNames
|
|
448
|
+
from spforge.ratings import PlayerRatingGenerator
|
|
449
|
+
|
|
450
|
+
np.random.seed(42)
|
|
451
|
+
data = {"player_id": [], "team_id": [], "match_id": [], "start_date": [], "perf": [], "minutes": []}
|
|
452
|
+
|
|
453
|
+
for match_idx in range(50):
|
|
454
|
+
date = f"2024-{(match_idx // 28) + 1:02d}-{(match_idx % 28) + 1:02d}"
|
|
455
|
+
for team_idx in range(2):
|
|
456
|
+
for player_idx in range(5):
|
|
457
|
+
minutes = min(np.random.exponential(scale=20) + 5, 48)
|
|
458
|
+
# Zero-inflated: high-minutes players more likely non-zero
|
|
459
|
+
zero_prob = 0.7 - 0.5 * (minutes / 48)
|
|
460
|
+
perf = 0.0 if np.random.random() < zero_prob else np.random.exponential(0.1)
|
|
461
|
+
|
|
462
|
+
data["player_id"].append(f"P{team_idx}_{player_idx}")
|
|
463
|
+
data["team_id"].append(f"T{team_idx}")
|
|
464
|
+
data["match_id"].append(f"M{match_idx}")
|
|
465
|
+
data["start_date"].append(date)
|
|
466
|
+
data["perf"].append(perf)
|
|
467
|
+
data["minutes"].append(minutes / 48)
|
|
468
|
+
|
|
469
|
+
cn = ColumnNames(
|
|
470
|
+
player_id="player_id", team_id="team_id", match_id="match_id",
|
|
471
|
+
start_date="start_date", update_match_id="match_id", participation_weight="minutes",
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
gen = PlayerRatingGenerator(performance_column="perf", column_names=cn, auto_scale_performance=True)
|
|
475
|
+
gen.fit_transform(pl.DataFrame(data))
|
|
476
|
+
|
|
477
|
+
pm = gen.performance_manager
|
|
478
|
+
if pm._using_quantile_scaler:
|
|
479
|
+
assert pm.transformers[-1].weight_column == "minutes", (
|
|
480
|
+
"RatingGenerator should wire quantile_weight_column to participation_weight"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
439
484
|
class TestAutoScalePerformanceBounds:
|
|
440
485
|
"""Tests for ensuring scaled performance stays within [0, 1] bounds."""
|
|
441
486
|
|
|
@@ -551,3 +551,41 @@ class TestQuantilePerformanceScaler:
|
|
|
551
551
|
# Non-zeros should all map to same value (since they're all equal)
|
|
552
552
|
nonzero_values = transformed["x"].values[~is_zero.values]
|
|
553
553
|
assert np.allclose(nonzero_values, nonzero_values[0])
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
class TestWeightedQuantilePerformanceScaler:
|
|
557
|
+
"""Tests for weighted quantile scaling algorithm."""
|
|
558
|
+
|
|
559
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
560
|
+
def test_weighted_mean_alignment(self, df_type):
|
|
561
|
+
"""Weighted scaling should produce weighted mean close to 0.5."""
|
|
562
|
+
np.random.seed(42)
|
|
563
|
+
n = 1000
|
|
564
|
+
weights = np.random.exponential(scale=20, size=n) + 1
|
|
565
|
+
values = []
|
|
566
|
+
for w in weights:
|
|
567
|
+
zero_prob = 0.6 - 0.4 * (w / weights.max())
|
|
568
|
+
values.append(0.0 if np.random.random() < zero_prob else np.random.exponential(scale=2))
|
|
569
|
+
|
|
570
|
+
df = df_type({"performance": np.array(values), "weight": weights})
|
|
571
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column="weight")
|
|
572
|
+
result = scaler.fit_transform(df)
|
|
573
|
+
|
|
574
|
+
scaled = result["performance"].values if isinstance(result, pd.DataFrame) else result["performance"].to_numpy()
|
|
575
|
+
weighted_mean = np.average(scaled, weights=weights)
|
|
576
|
+
assert abs(weighted_mean - 0.5) < 0.02
|
|
577
|
+
|
|
578
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
579
|
+
def test_backward_compatibility_without_weights(self, df_type):
|
|
580
|
+
"""weight_column=None should match original unweighted behavior."""
|
|
581
|
+
np.random.seed(42)
|
|
582
|
+
raw = np.concatenate([np.zeros(200), np.random.exponential(scale=2, size=300)])
|
|
583
|
+
np.random.shuffle(raw)
|
|
584
|
+
df = df_type({"performance": raw})
|
|
585
|
+
|
|
586
|
+
result1 = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column=None).fit_transform(df)
|
|
587
|
+
result2 = QuantilePerformanceScaler(features=["performance"], prefix="").fit_transform(df)
|
|
588
|
+
|
|
589
|
+
v1 = result1["performance"].values if isinstance(result1, pd.DataFrame) else result1["performance"].to_numpy()
|
|
590
|
+
v2 = result2["performance"].values if isinstance(result2, pd.DataFrame) else result2["performance"].to_numpy()
|
|
591
|
+
assert np.allclose(v1, v2, atol=1e-10)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|