spforge 0.8.38__py3-none-any.whl → 0.8.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/ratings/_base.py +32 -1
- {spforge-0.8.38.dist-info → spforge-0.8.39.dist-info}/METADATA +1 -1
- {spforge-0.8.38.dist-info → spforge-0.8.39.dist-info}/RECORD +8 -8
- tests/performance_transformers/test_performance_manager.py +36 -131
- tests/performance_transformers/test_performances_transformers.py +20 -193
- {spforge-0.8.38.dist-info → spforge-0.8.39.dist-info}/WHEEL +0 -0
- {spforge-0.8.38.dist-info → spforge-0.8.39.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.38.dist-info → spforge-0.8.39.dist-info}/top_level.txt +0 -0
spforge/ratings/_base.py
CHANGED
|
@@ -7,6 +7,7 @@ from abc import abstractmethod
|
|
|
7
7
|
from typing import Any, Literal
|
|
8
8
|
|
|
9
9
|
import narwhals.stable.v2 as nw
|
|
10
|
+
import numpy as np
|
|
10
11
|
import polars as pl
|
|
11
12
|
from narwhals.stable.v2 import DataFrame
|
|
12
13
|
from narwhals.stable.v2.typing import IntoFrameT
|
|
@@ -149,6 +150,17 @@ class RatingGenerator(FeatureGenerator):
|
|
|
149
150
|
|
|
150
151
|
if self.performance_manager:
|
|
151
152
|
if self.performance_manager:
|
|
153
|
+
# Wire in participation weight column for weighted quantile scaling
|
|
154
|
+
# This ensures zero-inflated distributions use weights for calibration
|
|
155
|
+
if (
|
|
156
|
+
self.column_names
|
|
157
|
+
and self.column_names.participation_weight
|
|
158
|
+
and self.column_names.participation_weight in df.columns
|
|
159
|
+
):
|
|
160
|
+
self.performance_manager.quantile_weight_column = (
|
|
161
|
+
self.column_names.participation_weight
|
|
162
|
+
)
|
|
163
|
+
|
|
152
164
|
ori_perf_values = df[self.performance_manager.ori_performance_column].to_list()
|
|
153
165
|
df = nw.from_native(self.performance_manager.fit_transform(df))
|
|
154
166
|
assert (
|
|
@@ -165,7 +177,26 @@ class RatingGenerator(FeatureGenerator):
|
|
|
165
177
|
"Either transform it manually or set auto_scale_performance to True"
|
|
166
178
|
)
|
|
167
179
|
|
|
168
|
-
|
|
180
|
+
# Use weighted mean when weighted quantile scaling is active
|
|
181
|
+
# because the weighted mean is what's calibrated to 0.5
|
|
182
|
+
if (
|
|
183
|
+
self.performance_manager
|
|
184
|
+
and self.performance_manager._using_quantile_scaler
|
|
185
|
+
and self.performance_manager.quantile_weight_column
|
|
186
|
+
and self.performance_manager.quantile_weight_column in df.columns
|
|
187
|
+
):
|
|
188
|
+
weights = df[self.performance_manager.quantile_weight_column]
|
|
189
|
+
valid_mask = perf.is_finite() & weights.is_finite() & (weights > 0)
|
|
190
|
+
if valid_mask.sum() > 0:
|
|
191
|
+
perf_values = perf.filter(valid_mask).to_numpy()
|
|
192
|
+
weight_values = weights.filter(valid_mask).to_numpy()
|
|
193
|
+
mean_val = float(np.average(perf_values, weights=weight_values))
|
|
194
|
+
else:
|
|
195
|
+
mean_val = float(finite_perf.mean())
|
|
196
|
+
else:
|
|
197
|
+
mean_val = float(finite_perf.mean())
|
|
198
|
+
|
|
199
|
+
if mean_val < 0.42 or mean_val > 0.58:
|
|
169
200
|
raise ValueError(
|
|
170
201
|
f"Mean {self.performance_column} must be between 0.42 and 0.58. "
|
|
171
202
|
"Either transform it manually or set auto_scale_performance to True"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.39
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -50,7 +50,7 @@ spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAb
|
|
|
50
50
|
spforge/performance_transformers/_performance_manager.py,sha256=lh7enqYLd1lXj1VTOiK5N880xkil5q1jRsM51fe_K5g,12322
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=nmjJTEH86JjFneWsnSWIYnUXQoUDskOraDO3VtuufIY,20931
|
|
52
52
|
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
|
-
spforge/ratings/_base.py,sha256=
|
|
53
|
+
spforge/ratings/_base.py,sha256=Stl_Y2gjQfS1jq_6CfeRG_e3R5Pei34WETdG6CaibGs,16487
|
|
54
54
|
spforge/ratings/_player_rating.py,sha256=AIpDEl6cZaC3urcY-jFFgUWd4WZ71A33c5mOPfkXdMs,68178
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
|
|
56
56
|
spforge/ratings/enums.py,sha256=maG0X4WMQeMVAc2wbceq1an-U-z8moZGeG2BAgfICDA,1809
|
|
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
|
|
|
71
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
72
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
73
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
74
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.39.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
75
75
|
tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
|
|
76
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
77
77
|
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
@@ -92,8 +92,8 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
|
|
|
92
92
|
tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
|
|
93
93
|
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
94
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
|
|
95
|
-
tests/performance_transformers/test_performance_manager.py,sha256=
|
|
96
|
-
tests/performance_transformers/test_performances_transformers.py,sha256=
|
|
95
|
+
tests/performance_transformers/test_performance_manager.py,sha256=Ob4s86hdnR_4RC9ZG3lpB5O4Gysr2cLyTmCsO6uWomc,21244
|
|
96
|
+
tests/performance_transformers/test_performances_transformers.py,sha256=2OLpFgBolU8e-1Pga3hiOGWWHhjYpfx8Qrf9YXiqjUw,20919
|
|
97
97
|
tests/ratings/test_player_rating_generator.py,sha256=1Pkx0H8xJMTeLc2Fu9zJcoDpBWiY2zCVSxuBFJk2uEs,110717
|
|
98
98
|
tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
|
|
99
99
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
108
108
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
109
109
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
110
110
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
111
|
-
spforge-0.8.
|
|
112
|
-
spforge-0.8.
|
|
113
|
-
spforge-0.8.
|
|
114
|
-
spforge-0.8.
|
|
111
|
+
spforge-0.8.39.dist-info/METADATA,sha256=njbTQ33nwPOZ71PhHQDxUWZzP4MjSavx8sT-JgK2fio,20048
|
|
112
|
+
spforge-0.8.39.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
113
|
+
spforge-0.8.39.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
114
|
+
spforge-0.8.39.dist-info/RECORD,,
|
|
@@ -437,143 +437,48 @@ class TestZeroInflationHandling:
|
|
|
437
437
|
|
|
438
438
|
|
|
439
439
|
class TestWeightedQuantileScaling:
|
|
440
|
-
"""
|
|
440
|
+
"""Test that RatingGenerator wires participation weights to quantile scaling."""
|
|
441
441
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
weights = np.random.exponential(scale=20, size=n) + 1
|
|
450
|
-
|
|
451
|
-
# High-weight rows have lower zero probability
|
|
452
|
-
values = []
|
|
453
|
-
for w in weights:
|
|
454
|
-
zero_prob = 0.6 - 0.4 * (w / weights.max())
|
|
455
|
-
if np.random.random() < zero_prob:
|
|
456
|
-
values.append(0.0)
|
|
457
|
-
else:
|
|
458
|
-
values.append(np.random.exponential(scale=2))
|
|
459
|
-
|
|
460
|
-
return np.array(values), weights
|
|
461
|
-
|
|
462
|
-
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
463
|
-
def test_performance_manager_with_weight_column(self, frame, weighted_zero_inflated_data):
|
|
464
|
-
"""Test that PerformanceManager passes weight column to QuantilePerformanceScaler."""
|
|
465
|
-
values, weights = weighted_zero_inflated_data
|
|
466
|
-
df = _make_native_df(frame, {"x": values, "minutes": weights})
|
|
467
|
-
|
|
468
|
-
pm = PerformanceManager(
|
|
469
|
-
features=["x"],
|
|
470
|
-
transformer_names=None, # Use defaults, auto-detect zero inflation
|
|
471
|
-
prefix="performance__",
|
|
472
|
-
performance_column="perf",
|
|
473
|
-
zero_inflation_threshold=0.15,
|
|
474
|
-
quantile_weight_column="minutes",
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
pm.fit(df)
|
|
478
|
-
|
|
479
|
-
# Should have switched to quantile scaler
|
|
480
|
-
assert pm._using_quantile_scaler is True
|
|
481
|
-
assert isinstance(pm.transformers[-1], QuantilePerformanceScaler)
|
|
482
|
-
# And should have the weight column set
|
|
483
|
-
assert pm.transformers[-1].weight_column == "minutes"
|
|
484
|
-
|
|
485
|
-
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
486
|
-
def test_weighted_scaling_reduces_weighted_bias(self, frame, weighted_zero_inflated_data):
|
|
487
|
-
"""Test that weighted scaling produces weighted mean closer to 0.5."""
|
|
488
|
-
values, weights = weighted_zero_inflated_data
|
|
489
|
-
df = _make_native_df(frame, {"x": values, "minutes": weights})
|
|
490
|
-
|
|
491
|
-
# With weighted scaling
|
|
492
|
-
pm_weighted = PerformanceManager(
|
|
493
|
-
features=["x"],
|
|
494
|
-
transformer_names=None,
|
|
495
|
-
prefix="performance__",
|
|
496
|
-
performance_column="perf",
|
|
497
|
-
zero_inflation_threshold=0.15,
|
|
498
|
-
quantile_weight_column="minutes",
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
result_weighted = pm_weighted.fit_transform(df)
|
|
502
|
-
result_weighted_nw = nw.from_native(result_weighted)
|
|
503
|
-
scaled_weighted = result_weighted_nw["performance__perf"].to_numpy()
|
|
504
|
-
|
|
505
|
-
# Without weighted scaling
|
|
506
|
-
pm_unweighted = PerformanceManager(
|
|
507
|
-
features=["x"],
|
|
508
|
-
transformer_names=None,
|
|
509
|
-
prefix="performance__",
|
|
510
|
-
performance_column="perf",
|
|
511
|
-
zero_inflation_threshold=0.15,
|
|
512
|
-
quantile_weight_column=None, # No weighting
|
|
513
|
-
)
|
|
514
|
-
|
|
515
|
-
result_unweighted = pm_unweighted.fit_transform(df)
|
|
516
|
-
result_unweighted_nw = nw.from_native(result_unweighted)
|
|
517
|
-
scaled_unweighted = result_unweighted_nw["performance__perf"].to_numpy()
|
|
518
|
-
|
|
519
|
-
# Compute weighted means
|
|
520
|
-
weighted_mean_of_weighted = np.average(scaled_weighted, weights=weights)
|
|
521
|
-
weighted_mean_of_unweighted = np.average(scaled_unweighted, weights=weights)
|
|
522
|
-
|
|
523
|
-
# Weighted scaling should have weighted mean closer to 0.5
|
|
524
|
-
assert abs(weighted_mean_of_weighted - 0.5) < abs(weighted_mean_of_unweighted - 0.5), (
|
|
525
|
-
f"Weighted mean with weighted scaling ({weighted_mean_of_weighted:.4f}) "
|
|
526
|
-
f"should be closer to 0.5 than without ({weighted_mean_of_unweighted:.4f})"
|
|
527
|
-
)
|
|
528
|
-
|
|
529
|
-
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
530
|
-
def test_performance_weights_manager_with_quantile_weight_column(
|
|
531
|
-
self, frame, weighted_zero_inflated_data
|
|
532
|
-
):
|
|
533
|
-
"""Test that PerformanceWeightsManager also supports quantile_weight_column."""
|
|
534
|
-
from spforge.performance_transformers._performance_manager import ColumnWeight
|
|
535
|
-
|
|
536
|
-
values, weights = weighted_zero_inflated_data
|
|
537
|
-
df = _make_native_df(frame, {"feat_a": values, "minutes": weights})
|
|
442
|
+
def test_rating_generator_wires_weight_column(self):
|
|
443
|
+
"""
|
|
444
|
+
RatingGenerator should automatically wire participation_weight to
|
|
445
|
+
quantile_weight_column when using auto_scale_performance with zero-inflated data.
|
|
446
|
+
"""
|
|
447
|
+
from spforge import ColumnNames
|
|
448
|
+
from spforge.ratings import PlayerRatingGenerator
|
|
538
449
|
|
|
539
|
-
column_weights = [ColumnWeight(name="feat_a", weight=1.0)]
|
|
540
|
-
manager = PerformanceWeightsManager(
|
|
541
|
-
weights=column_weights,
|
|
542
|
-
transformer_names=None,
|
|
543
|
-
prefix="",
|
|
544
|
-
zero_inflation_threshold=0.15,
|
|
545
|
-
quantile_weight_column="minutes",
|
|
546
|
-
)
|
|
547
|
-
|
|
548
|
-
manager.fit(df)
|
|
549
|
-
|
|
550
|
-
# Should have switched to quantile scaler with weight column
|
|
551
|
-
assert manager._using_quantile_scaler is True
|
|
552
|
-
assert manager.transformers[-1].weight_column == "minutes"
|
|
553
|
-
|
|
554
|
-
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
555
|
-
def test_weight_column_not_used_when_no_zero_inflation(self, frame):
|
|
556
|
-
"""Test that weight column is not needed when zero inflation is not detected."""
|
|
557
450
|
np.random.seed(42)
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
451
|
+
data = {"player_id": [], "team_id": [], "match_id": [], "start_date": [], "perf": [], "minutes": []}
|
|
452
|
+
|
|
453
|
+
for match_idx in range(50):
|
|
454
|
+
date = f"2024-{(match_idx // 28) + 1:02d}-{(match_idx % 28) + 1:02d}"
|
|
455
|
+
for team_idx in range(2):
|
|
456
|
+
for player_idx in range(5):
|
|
457
|
+
minutes = min(np.random.exponential(scale=20) + 5, 48)
|
|
458
|
+
# Zero-inflated: high-minutes players more likely non-zero
|
|
459
|
+
zero_prob = 0.7 - 0.5 * (minutes / 48)
|
|
460
|
+
perf = 0.0 if np.random.random() < zero_prob else np.random.exponential(0.1)
|
|
461
|
+
|
|
462
|
+
data["player_id"].append(f"P{team_idx}_{player_idx}")
|
|
463
|
+
data["team_id"].append(f"T{team_idx}")
|
|
464
|
+
data["match_id"].append(f"M{match_idx}")
|
|
465
|
+
data["start_date"].append(date)
|
|
466
|
+
data["perf"].append(perf)
|
|
467
|
+
data["minutes"].append(minutes / 48)
|
|
468
|
+
|
|
469
|
+
cn = ColumnNames(
|
|
470
|
+
player_id="player_id", team_id="team_id", match_id="match_id",
|
|
471
|
+
start_date="start_date", update_match_id="match_id", participation_weight="minutes",
|
|
571
472
|
)
|
|
572
473
|
|
|
573
|
-
|
|
474
|
+
gen = PlayerRatingGenerator(performance_column="perf", column_names=cn, auto_scale_performance=True)
|
|
475
|
+
gen.fit_transform(pl.DataFrame(data))
|
|
574
476
|
|
|
575
|
-
|
|
576
|
-
|
|
477
|
+
pm = gen.performance_manager
|
|
478
|
+
if pm._using_quantile_scaler:
|
|
479
|
+
assert pm.transformers[-1].weight_column == "minutes", (
|
|
480
|
+
"RatingGenerator should wire quantile_weight_column to participation_weight"
|
|
481
|
+
)
|
|
577
482
|
|
|
578
483
|
|
|
579
484
|
class TestAutoScalePerformanceBounds:
|
|
@@ -554,211 +554,38 @@ class TestQuantilePerformanceScaler:
|
|
|
554
554
|
|
|
555
555
|
|
|
556
556
|
class TestWeightedQuantilePerformanceScaler:
|
|
557
|
-
"""Tests for weighted quantile scaling."""
|
|
557
|
+
"""Tests for weighted quantile scaling algorithm."""
|
|
558
558
|
|
|
559
|
-
@pytest.
|
|
560
|
-
def
|
|
561
|
-
"""
|
|
559
|
+
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
560
|
+
def test_weighted_mean_alignment(self, df_type):
|
|
561
|
+
"""Weighted scaling should produce weighted mean close to 0.5."""
|
|
562
562
|
np.random.seed(42)
|
|
563
563
|
n = 1000
|
|
564
|
-
|
|
565
|
-
# Create weights (e.g., minutes played)
|
|
566
|
-
weights = np.random.exponential(scale=20, size=n) + 1 # 1 to ~100
|
|
567
|
-
|
|
568
|
-
# High-weight rows have lower zero probability
|
|
569
|
-
# This simulates: players with more minutes are more likely to have non-zero stats
|
|
564
|
+
weights = np.random.exponential(scale=20, size=n) + 1
|
|
570
565
|
values = []
|
|
571
566
|
for w in weights:
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
if np.random.random() < zero_prob:
|
|
575
|
-
values.append(0.0)
|
|
576
|
-
else:
|
|
577
|
-
values.append(np.random.exponential(scale=2))
|
|
578
|
-
|
|
579
|
-
return np.array(values), weights
|
|
580
|
-
|
|
581
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
582
|
-
def test_weighted_mean_alignment(self, df_type, weighted_zero_inflated_data):
|
|
583
|
-
"""Test that weighted mean of scaled values is closer to 0.5 with weighted scaling."""
|
|
584
|
-
values, weights = weighted_zero_inflated_data
|
|
585
|
-
df = df_type({"performance": values, "weight": weights})
|
|
586
|
-
|
|
587
|
-
# Weighted scaler
|
|
588
|
-
weighted_scaler = QuantilePerformanceScaler(
|
|
589
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
590
|
-
)
|
|
591
|
-
weighted_transformed = weighted_scaler.fit_transform(df)
|
|
592
|
-
|
|
593
|
-
if isinstance(weighted_transformed, pd.DataFrame):
|
|
594
|
-
weighted_scaled = weighted_transformed["performance"].values
|
|
595
|
-
else:
|
|
596
|
-
weighted_scaled = weighted_transformed["performance"].to_numpy()
|
|
567
|
+
zero_prob = 0.6 - 0.4 * (w / weights.max())
|
|
568
|
+
values.append(0.0 if np.random.random() < zero_prob else np.random.exponential(scale=2))
|
|
597
569
|
|
|
598
|
-
|
|
599
|
-
|
|
570
|
+
df = df_type({"performance": np.array(values), "weight": weights})
|
|
571
|
+
scaler = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column="weight")
|
|
572
|
+
result = scaler.fit_transform(df)
|
|
600
573
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
)
|
|
574
|
+
scaled = result["performance"].values if isinstance(result, pd.DataFrame) else result["performance"].to_numpy()
|
|
575
|
+
weighted_mean = np.average(scaled, weights=weights)
|
|
576
|
+
assert abs(weighted_mean - 0.5) < 0.02
|
|
605
577
|
|
|
606
578
|
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
607
579
|
def test_backward_compatibility_without_weights(self, df_type):
|
|
608
|
-
"""
|
|
580
|
+
"""weight_column=None should match original unweighted behavior."""
|
|
609
581
|
np.random.seed(42)
|
|
610
|
-
|
|
611
|
-
zeros = np.zeros(200)
|
|
612
|
-
nonzeros = np.random.exponential(scale=2, size=n - 200)
|
|
613
|
-
raw = np.concatenate([zeros, nonzeros])
|
|
582
|
+
raw = np.concatenate([np.zeros(200), np.random.exponential(scale=2, size=300)])
|
|
614
583
|
np.random.shuffle(raw)
|
|
615
|
-
|
|
616
584
|
df = df_type({"performance": raw})
|
|
617
585
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
features=["performance"], prefix="", weight_column=None
|
|
621
|
-
)
|
|
622
|
-
unweighted_result = unweighted_scaler.fit_transform(df)
|
|
623
|
-
|
|
624
|
-
# Scaler without weight_column argument
|
|
625
|
-
default_scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
|
|
626
|
-
default_result = default_scaler.fit_transform(df)
|
|
627
|
-
|
|
628
|
-
if isinstance(unweighted_result, pd.DataFrame):
|
|
629
|
-
unweighted_values = unweighted_result["performance"].values
|
|
630
|
-
default_values = default_result["performance"].values
|
|
631
|
-
else:
|
|
632
|
-
unweighted_values = unweighted_result["performance"].to_numpy()
|
|
633
|
-
default_values = default_result["performance"].to_numpy()
|
|
634
|
-
|
|
635
|
-
# Results should be identical
|
|
636
|
-
assert np.allclose(unweighted_values, default_values, atol=1e-10)
|
|
637
|
-
|
|
638
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
639
|
-
def test_equal_weights_matches_unweighted(self, df_type):
|
|
640
|
-
"""Test that all equal weights produce same result as unweighted."""
|
|
641
|
-
np.random.seed(42)
|
|
642
|
-
n = 500
|
|
643
|
-
zeros = np.zeros(200)
|
|
644
|
-
nonzeros = np.random.exponential(scale=2, size=n - 200)
|
|
645
|
-
raw = np.concatenate([zeros, nonzeros])
|
|
646
|
-
np.random.shuffle(raw)
|
|
647
|
-
equal_weights = np.ones(n)
|
|
648
|
-
|
|
649
|
-
df = df_type({"performance": raw, "weight": equal_weights})
|
|
650
|
-
|
|
651
|
-
# Weighted scaler with equal weights
|
|
652
|
-
weighted_scaler = QuantilePerformanceScaler(
|
|
653
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
654
|
-
)
|
|
655
|
-
weighted_result = weighted_scaler.fit_transform(df)
|
|
656
|
-
|
|
657
|
-
# Unweighted scaler
|
|
658
|
-
unweighted_scaler = QuantilePerformanceScaler(
|
|
659
|
-
features=["performance"], prefix="", weight_column=None
|
|
660
|
-
)
|
|
661
|
-
unweighted_result = unweighted_scaler.fit_transform(df)
|
|
662
|
-
|
|
663
|
-
if isinstance(weighted_result, pd.DataFrame):
|
|
664
|
-
weighted_values = weighted_result["performance"].values
|
|
665
|
-
unweighted_values = unweighted_result["performance"].values
|
|
666
|
-
else:
|
|
667
|
-
weighted_values = weighted_result["performance"].to_numpy()
|
|
668
|
-
unweighted_values = unweighted_result["performance"].to_numpy()
|
|
669
|
-
|
|
670
|
-
# Results should be very close (may differ slightly due to algorithm differences)
|
|
671
|
-
assert np.allclose(weighted_values, unweighted_values, atol=0.02)
|
|
672
|
-
|
|
673
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
674
|
-
def test_weights_with_zeros_excluded(self, df_type):
|
|
675
|
-
"""Test that rows with zero weights are excluded from fitting."""
|
|
676
|
-
np.random.seed(42)
|
|
677
|
-
# Create data where zeros have zero weight
|
|
678
|
-
values = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
|
|
679
|
-
weights = np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) # Zero weight for zeros
|
|
680
|
-
|
|
681
|
-
df = df_type({"performance": values, "weight": weights})
|
|
586
|
+
result1 = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column=None).fit_transform(df)
|
|
587
|
+
result2 = QuantilePerformanceScaler(features=["performance"], prefix="").fit_transform(df)
|
|
682
588
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
)
|
|
686
|
-
scaler.fit(df)
|
|
687
|
-
|
|
688
|
-
# Zero proportion should be 0 because zero-weight rows are excluded
|
|
689
|
-
assert scaler._zero_proportion["performance"] == 0.0
|
|
690
|
-
|
|
691
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
692
|
-
def test_weighted_zero_proportion(self, df_type):
|
|
693
|
-
"""Test that zero proportion is computed using weights."""
|
|
694
|
-
# 3 zeros with weight 10 each = 30
|
|
695
|
-
# 7 non-zeros with weight 10 each = 70
|
|
696
|
-
# Weighted zero proportion = 30/100 = 0.3
|
|
697
|
-
values = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
|
|
698
|
-
weights = np.array([10.0] * 10)
|
|
699
|
-
|
|
700
|
-
df = df_type({"performance": values, "weight": weights})
|
|
701
|
-
|
|
702
|
-
scaler = QuantilePerformanceScaler(
|
|
703
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
704
|
-
)
|
|
705
|
-
scaler.fit(df)
|
|
706
|
-
|
|
707
|
-
assert abs(scaler._zero_proportion["performance"] - 0.3) < 1e-10
|
|
708
|
-
|
|
709
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
710
|
-
def test_weighted_zero_proportion_unequal_weights(self, df_type):
|
|
711
|
-
"""Test weighted zero proportion with unequal weights."""
|
|
712
|
-
# 2 zeros with weight 5 each = 10
|
|
713
|
-
# 2 non-zeros with weight 15 each = 30
|
|
714
|
-
# Weighted zero proportion = 10/40 = 0.25
|
|
715
|
-
values = np.array([0.0, 0.0, 1.0, 2.0])
|
|
716
|
-
weights = np.array([5.0, 5.0, 15.0, 15.0])
|
|
717
|
-
|
|
718
|
-
df = df_type({"performance": values, "weight": weights})
|
|
719
|
-
|
|
720
|
-
scaler = QuantilePerformanceScaler(
|
|
721
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
722
|
-
)
|
|
723
|
-
scaler.fit(df)
|
|
724
|
-
|
|
725
|
-
assert abs(scaler._zero_proportion["performance"] - 0.25) < 1e-10
|
|
726
|
-
|
|
727
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
728
|
-
def test_monotonicity_preserved_with_weights(self, df_type, weighted_zero_inflated_data):
|
|
729
|
-
"""Test that monotonicity is preserved with weighted scaling."""
|
|
730
|
-
values, weights = weighted_zero_inflated_data
|
|
731
|
-
df = df_type({"performance": values, "weight": weights})
|
|
732
|
-
|
|
733
|
-
scaler = QuantilePerformanceScaler(
|
|
734
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
735
|
-
)
|
|
736
|
-
transformed = scaler.fit_transform(df)
|
|
737
|
-
|
|
738
|
-
if isinstance(transformed, pd.DataFrame):
|
|
739
|
-
scaled = transformed["performance"].values
|
|
740
|
-
else:
|
|
741
|
-
scaled = transformed["performance"].to_numpy()
|
|
742
|
-
|
|
743
|
-
# Check monotonicity
|
|
744
|
-
order = np.argsort(values)
|
|
745
|
-
sorted_scaled = scaled[order]
|
|
746
|
-
assert np.all(np.diff(sorted_scaled) >= -1e-10)
|
|
747
|
-
|
|
748
|
-
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
749
|
-
def test_bounded_zero_one_with_weights(self, df_type, weighted_zero_inflated_data):
|
|
750
|
-
"""Test that output is bounded [0, 1] with weighted scaling."""
|
|
751
|
-
values, weights = weighted_zero_inflated_data
|
|
752
|
-
df = df_type({"performance": values, "weight": weights})
|
|
753
|
-
|
|
754
|
-
scaler = QuantilePerformanceScaler(
|
|
755
|
-
features=["performance"], prefix="", weight_column="weight"
|
|
756
|
-
)
|
|
757
|
-
transformed = scaler.fit_transform(df)
|
|
758
|
-
|
|
759
|
-
if isinstance(transformed, pd.DataFrame):
|
|
760
|
-
scaled = transformed["performance"].values
|
|
761
|
-
else:
|
|
762
|
-
scaled = transformed["performance"].to_numpy()
|
|
763
|
-
|
|
764
|
-
assert np.all((scaled >= 0) & (scaled <= 1))
|
|
589
|
+
v1 = result1["performance"].values if isinstance(result1, pd.DataFrame) else result1["performance"].to_numpy()
|
|
590
|
+
v2 = result2["performance"].values if isinstance(result2, pd.DataFrame) else result2["performance"].to_numpy()
|
|
591
|
+
assert np.allclose(v1, v2, atol=1e-10)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|