spforge 0.8.37__py3-none-any.whl → 0.8.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,6 +89,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
89
89
  min_value: float = 0.0,
90
90
  max_value: float = 1.0,
91
91
  zero_inflation_threshold: float = 0.15,
92
+ quantile_weight_column: str | None = None,
92
93
  ):
93
94
  self.features = features
94
95
  self.prefix = prefix
@@ -106,6 +107,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
106
107
  self.min_value = min_value
107
108
  self.max_value = max_value
108
109
  self.zero_inflation_threshold = zero_inflation_threshold
110
+ self.quantile_weight_column = quantile_weight_column
109
111
 
110
112
  self.transformers = create_performance_scalers_transformers(
111
113
  transformer_names=self.transformer_names,
@@ -150,6 +152,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
150
152
  QuantilePerformanceScaler(
151
153
  features=prefixed_features,
152
154
  prefix="",
155
+ weight_column=self.quantile_weight_column,
153
156
  )
154
157
  ]
155
158
  break
@@ -214,6 +217,7 @@ class PerformanceWeightsManager(PerformanceManager):
214
217
  prefix: str = "performance__",
215
218
  return_all_features: bool = False,
216
219
  zero_inflation_threshold: float = 0.15,
220
+ quantile_weight_column: str | None = None,
217
221
  ):
218
222
  self.weights = weights
219
223
  self.return_all_features = return_all_features
@@ -227,6 +231,7 @@ class PerformanceWeightsManager(PerformanceManager):
227
231
  min_value=min_value,
228
232
  performance_column=performance_column,
229
233
  zero_inflation_threshold=zero_inflation_threshold,
234
+ quantile_weight_column=quantile_weight_column,
230
235
  )
231
236
 
232
237
  @nw.narwhalify
@@ -432,6 +432,9 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
432
432
  - Non-zeros → uniform on (π, 1) via empirical CDF
433
433
 
434
434
  Fast: O(n log n) for fit, O(n) for transform.
435
+
436
+ If weight_column is provided, weighted quantiles are computed so that
437
+ the scaling respects participation weights (e.g., minutes played).
435
438
  """
436
439
 
437
440
  def __init__(
@@ -440,11 +443,13 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
440
443
  zero_threshold: float = 1e-10,
441
444
  n_quantiles: int = 1000,
442
445
  prefix: str = "",
446
+ weight_column: str | None = None,
443
447
  ):
444
448
  self.features = features
445
449
  self.zero_threshold = zero_threshold
446
450
  self.n_quantiles = n_quantiles
447
451
  self.prefix = prefix
452
+ self.weight_column = weight_column
448
453
  self.features_out = [self.prefix + f for f in self.features]
449
454
 
450
455
  self._zero_proportion: dict[str, float] = {}
@@ -452,21 +457,82 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
452
457
 
453
458
  @nw.narwhalify
454
459
  def fit(self, df: IntoFrameT, y=None):
460
+ # Get weights if specified
461
+ weights = None
462
+ if self.weight_column is not None:
463
+ weights = df[self.weight_column].to_numpy()
464
+
455
465
  for feature in self.features:
456
466
  values = df[feature].to_numpy()
457
- values = values[np.isfinite(values)]
458
467
 
459
- is_zero = np.abs(values) < self.zero_threshold
460
- self._zero_proportion[feature] = np.mean(is_zero)
468
+ # Create finite mask
469
+ finite_mask = np.isfinite(values)
470
+ if weights is not None:
471
+ # Also require finite, positive weights
472
+ weight_valid = np.isfinite(weights) & (weights > 0)
473
+ finite_mask = finite_mask & weight_valid
474
+
475
+ values_finite = values[finite_mask]
476
+
477
+ if weights is not None:
478
+ weights_finite = weights[finite_mask]
479
+ else:
480
+ weights_finite = None
481
+
482
+ is_zero = np.abs(values_finite) < self.zero_threshold
483
+
484
+ if weights_finite is not None:
485
+ # Weighted zero proportion: sum(weights where zero) / sum(weights)
486
+ total_weight = np.sum(weights_finite)
487
+ if total_weight > 0:
488
+ self._zero_proportion[feature] = np.sum(weights_finite[is_zero]) / total_weight
489
+ else:
490
+ self._zero_proportion[feature] = np.mean(is_zero)
491
+ else:
492
+ self._zero_proportion[feature] = np.mean(is_zero)
493
+
494
+ nonzero_mask = ~is_zero
495
+ nonzero_values = values_finite[nonzero_mask]
461
496
 
462
- nonzero_values = values[~is_zero]
463
497
  if len(nonzero_values) > 0:
464
- percentiles = np.linspace(0, 100, self.n_quantiles + 1)
465
- self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
498
+ if weights_finite is not None:
499
+ # Weighted quantiles using interpolation on weighted CDF
500
+ nonzero_weights = weights_finite[nonzero_mask]
501
+ self._nonzero_quantiles[feature] = self._compute_weighted_quantiles(
502
+ nonzero_values, nonzero_weights
503
+ )
504
+ else:
505
+ percentiles = np.linspace(0, 100, self.n_quantiles + 1)
506
+ self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
466
507
  else:
467
508
  self._nonzero_quantiles[feature] = None
468
509
  return self
469
510
 
511
+ def _compute_weighted_quantiles(
512
+ self, values: np.ndarray, weights: np.ndarray
513
+ ) -> np.ndarray:
514
+ """Compute weighted quantiles using weighted CDF interpolation."""
515
+ # Sort by value
516
+ order = np.argsort(values)
517
+ sorted_values = values[order]
518
+ sorted_weights = weights[order]
519
+
520
+ # Compute weighted CDF
521
+ cumulative_weights = np.cumsum(sorted_weights)
522
+ total_weight = cumulative_weights[-1]
523
+
524
+ # Normalize CDF to [0, 1]
525
+ cdf = cumulative_weights / total_weight
526
+
527
+ # Sample quantiles at evenly spaced CDF positions
528
+ target_cdf = np.linspace(0, 1, self.n_quantiles + 1)
529
+
530
+ # Interpolate to get quantile values
531
+ # Use np.interp which handles edge cases gracefully
532
+ quantiles = np.interp(target_cdf, cdf, sorted_values)
533
+
534
+ return quantiles
535
+
470
536
  @nw.narwhalify
471
537
  def transform(self, df: IntoFrameT) -> IntoFrameT:
472
538
  for feature in self.features:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.37
3
+ Version: 0.8.38
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -47,8 +47,8 @@ spforge/hyperparameter_tuning/__init__.py,sha256=Vcl8rVlJ7M708iPgqe4XxpZWgJKGux0
47
47
  spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=SjwXLpvYIu_JY8uPRHeL5Kgp1aa0slWDz8qsKDaohWQ,8020
48
48
  spforge/hyperparameter_tuning/_tuner.py,sha256=M79q3saM6r0UZJsRUUgfdDr-3Qii-F2-wuSAZLFtZDo,19246
49
49
  spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAbuKK9M9xHq7IGlQU,482
50
- spforge/performance_transformers/_performance_manager.py,sha256=_qXqj8aaVJyTuUXrZxbOSANwL5iO3Rd1yz9WBwYBTMA,12025
51
- spforge/performance_transformers/_performances_transformers.py,sha256=ZjkFDXoEe5fURpN-dNkrgFXpHEg4aFCWdBDnPyLtgkM,18368
50
+ spforge/performance_transformers/_performance_manager.py,sha256=lh7enqYLd1lXj1VTOiK5N880xkil5q1jRsM51fe_K5g,12322
51
+ spforge/performance_transformers/_performances_transformers.py,sha256=nmjJTEH86JjFneWsnSWIYnUXQoUDskOraDO3VtuufIY,20931
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=Z-VVXWmnzR0O7o2_Q2x2ru_3uiTMpWqKDGQaNBJxCMA,14927
54
54
  spforge/ratings/_player_rating.py,sha256=AIpDEl6cZaC3urcY-jFFgUWd4WZ71A33c5mOPfkXdMs,68178
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.37.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.38.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -92,8 +92,8 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
92
92
  tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
93
93
  tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
95
- tests/performance_transformers/test_performance_manager.py,sha256=Ja2TWq1vGoAqBSSeAWZ26drwEnsF6TmMmpQ0nsMRU_4,19163
96
- tests/performance_transformers/test_performances_transformers.py,sha256=HnW7GKQ6B0ova6Zy0lKbEpA6peZGFE4oi9Th6r7RnG0,18949
95
+ tests/performance_transformers/test_performance_manager.py,sha256=tHyyyjAotJLtrViWx3j0DaNDqp3nowooMu5Wop7DjBE,24766
96
+ tests/performance_transformers/test_performances_transformers.py,sha256=QyLNzis7yOBsjjclhuYrbZFaSmlTcSAbGVzNvK1B-SU,27817
97
97
  tests/ratings/test_player_rating_generator.py,sha256=1Pkx0H8xJMTeLc2Fu9zJcoDpBWiY2zCVSxuBFJk2uEs,110717
98
98
  tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
108
108
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
109
109
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
110
110
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
111
- spforge-0.8.37.dist-info/METADATA,sha256=fLFkSzIsDRPKRpyJ-H126XcKG_NAUyXmJNGDNrogq4s,20048
112
- spforge-0.8.37.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
- spforge-0.8.37.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
114
- spforge-0.8.37.dist-info/RECORD,,
111
+ spforge-0.8.38.dist-info/METADATA,sha256=XXk1_WwD1gWvzRk08OSagsR6_w0qJAjcWX57-fwL9rg,20048
112
+ spforge-0.8.38.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
+ spforge-0.8.38.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
114
+ spforge-0.8.38.dist-info/RECORD,,
@@ -436,6 +436,146 @@ class TestZeroInflationHandling:
436
436
  assert manager._using_quantile_scaler is True
437
437
 
438
438
 
439
+ class TestWeightedQuantileScaling:
440
+ """Tests for weighted quantile scaling in PerformanceManager."""
441
+
442
+ @pytest.fixture
443
+ def weighted_zero_inflated_data(self):
444
+ """Create zero-inflated data where high-weight rows have higher non-zero rate."""
445
+ np.random.seed(42)
446
+ n = 1000
447
+
448
+ # Create weights (e.g., minutes played)
449
+ weights = np.random.exponential(scale=20, size=n) + 1
450
+
451
+ # High-weight rows have lower zero probability
452
+ values = []
453
+ for w in weights:
454
+ zero_prob = 0.6 - 0.4 * (w / weights.max())
455
+ if np.random.random() < zero_prob:
456
+ values.append(0.0)
457
+ else:
458
+ values.append(np.random.exponential(scale=2))
459
+
460
+ return np.array(values), weights
461
+
462
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
463
+ def test_performance_manager_with_weight_column(self, frame, weighted_zero_inflated_data):
464
+ """Test that PerformanceManager passes weight column to QuantilePerformanceScaler."""
465
+ values, weights = weighted_zero_inflated_data
466
+ df = _make_native_df(frame, {"x": values, "minutes": weights})
467
+
468
+ pm = PerformanceManager(
469
+ features=["x"],
470
+ transformer_names=None, # Use defaults, auto-detect zero inflation
471
+ prefix="performance__",
472
+ performance_column="perf",
473
+ zero_inflation_threshold=0.15,
474
+ quantile_weight_column="minutes",
475
+ )
476
+
477
+ pm.fit(df)
478
+
479
+ # Should have switched to quantile scaler
480
+ assert pm._using_quantile_scaler is True
481
+ assert isinstance(pm.transformers[-1], QuantilePerformanceScaler)
482
+ # And should have the weight column set
483
+ assert pm.transformers[-1].weight_column == "minutes"
484
+
485
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
486
+ def test_weighted_scaling_reduces_weighted_bias(self, frame, weighted_zero_inflated_data):
487
+ """Test that weighted scaling produces weighted mean closer to 0.5."""
488
+ values, weights = weighted_zero_inflated_data
489
+ df = _make_native_df(frame, {"x": values, "minutes": weights})
490
+
491
+ # With weighted scaling
492
+ pm_weighted = PerformanceManager(
493
+ features=["x"],
494
+ transformer_names=None,
495
+ prefix="performance__",
496
+ performance_column="perf",
497
+ zero_inflation_threshold=0.15,
498
+ quantile_weight_column="minutes",
499
+ )
500
+
501
+ result_weighted = pm_weighted.fit_transform(df)
502
+ result_weighted_nw = nw.from_native(result_weighted)
503
+ scaled_weighted = result_weighted_nw["performance__perf"].to_numpy()
504
+
505
+ # Without weighted scaling
506
+ pm_unweighted = PerformanceManager(
507
+ features=["x"],
508
+ transformer_names=None,
509
+ prefix="performance__",
510
+ performance_column="perf",
511
+ zero_inflation_threshold=0.15,
512
+ quantile_weight_column=None, # No weighting
513
+ )
514
+
515
+ result_unweighted = pm_unweighted.fit_transform(df)
516
+ result_unweighted_nw = nw.from_native(result_unweighted)
517
+ scaled_unweighted = result_unweighted_nw["performance__perf"].to_numpy()
518
+
519
+ # Compute weighted means
520
+ weighted_mean_of_weighted = np.average(scaled_weighted, weights=weights)
521
+ weighted_mean_of_unweighted = np.average(scaled_unweighted, weights=weights)
522
+
523
+ # Weighted scaling should have weighted mean closer to 0.5
524
+ assert abs(weighted_mean_of_weighted - 0.5) < abs(weighted_mean_of_unweighted - 0.5), (
525
+ f"Weighted mean with weighted scaling ({weighted_mean_of_weighted:.4f}) "
526
+ f"should be closer to 0.5 than without ({weighted_mean_of_unweighted:.4f})"
527
+ )
528
+
529
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
530
+ def test_performance_weights_manager_with_quantile_weight_column(
531
+ self, frame, weighted_zero_inflated_data
532
+ ):
533
+ """Test that PerformanceWeightsManager also supports quantile_weight_column."""
534
+ from spforge.performance_transformers._performance_manager import ColumnWeight
535
+
536
+ values, weights = weighted_zero_inflated_data
537
+ df = _make_native_df(frame, {"feat_a": values, "minutes": weights})
538
+
539
+ column_weights = [ColumnWeight(name="feat_a", weight=1.0)]
540
+ manager = PerformanceWeightsManager(
541
+ weights=column_weights,
542
+ transformer_names=None,
543
+ prefix="",
544
+ zero_inflation_threshold=0.15,
545
+ quantile_weight_column="minutes",
546
+ )
547
+
548
+ manager.fit(df)
549
+
550
+ # Should have switched to quantile scaler with weight column
551
+ assert manager._using_quantile_scaler is True
552
+ assert manager.transformers[-1].weight_column == "minutes"
553
+
554
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
555
+ def test_weight_column_not_used_when_no_zero_inflation(self, frame):
556
+ """Test that weight column is not needed when zero inflation is not detected."""
557
+ np.random.seed(42)
558
+ # Normal distribution - no zero inflation
559
+ data = np.random.normal(loc=0.5, scale=0.1, size=1000)
560
+ weights = np.random.exponential(scale=20, size=1000) + 1
561
+
562
+ df = _make_native_df(frame, {"x": data, "minutes": weights})
563
+
564
+ pm = PerformanceManager(
565
+ features=["x"],
566
+ transformer_names=None,
567
+ prefix="performance__",
568
+ performance_column="perf",
569
+ zero_inflation_threshold=0.15,
570
+ quantile_weight_column="minutes",
571
+ )
572
+
573
+ pm.fit(df)
574
+
575
+ # Should NOT have switched to quantile scaler
576
+ assert pm._using_quantile_scaler is False
577
+
578
+
439
579
  class TestAutoScalePerformanceBounds:
440
580
  """Tests for ensuring scaled performance stays within [0, 1] bounds."""
441
581
 
@@ -551,3 +551,214 @@ class TestQuantilePerformanceScaler:
551
551
  # Non-zeros should all map to same value (since they're all equal)
552
552
  nonzero_values = transformed["x"].values[~is_zero.values]
553
553
  assert np.allclose(nonzero_values, nonzero_values[0])
554
+
555
+
556
+ class TestWeightedQuantilePerformanceScaler:
557
+ """Tests for weighted quantile scaling."""
558
+
559
+ @pytest.fixture
560
+ def weighted_zero_inflated_data(self):
561
+ """Create zero-inflated data where high-weight rows have higher non-zero rate."""
562
+ np.random.seed(42)
563
+ n = 1000
564
+
565
+ # Create weights (e.g., minutes played)
566
+ weights = np.random.exponential(scale=20, size=n) + 1 # 1 to ~100
567
+
568
+ # High-weight rows have lower zero probability
569
+ # This simulates: players with more minutes are more likely to have non-zero stats
570
+ values = []
571
+ for w in weights:
572
+ # Zero probability decreases as weight increases
573
+ zero_prob = 0.6 - 0.4 * (w / weights.max()) # 0.2 to 0.6
574
+ if np.random.random() < zero_prob:
575
+ values.append(0.0)
576
+ else:
577
+ values.append(np.random.exponential(scale=2))
578
+
579
+ return np.array(values), weights
580
+
581
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
582
+ def test_weighted_mean_alignment(self, df_type, weighted_zero_inflated_data):
583
+ """Test that weighted mean of scaled values is closer to 0.5 with weighted scaling."""
584
+ values, weights = weighted_zero_inflated_data
585
+ df = df_type({"performance": values, "weight": weights})
586
+
587
+ # Weighted scaler
588
+ weighted_scaler = QuantilePerformanceScaler(
589
+ features=["performance"], prefix="", weight_column="weight"
590
+ )
591
+ weighted_transformed = weighted_scaler.fit_transform(df)
592
+
593
+ if isinstance(weighted_transformed, pd.DataFrame):
594
+ weighted_scaled = weighted_transformed["performance"].values
595
+ else:
596
+ weighted_scaled = weighted_transformed["performance"].to_numpy()
597
+
598
+ # Compute weighted mean
599
+ weighted_mean = np.average(weighted_scaled, weights=weights)
600
+
601
+ # Weighted scaling should have weighted mean close to 0.5
602
+ assert abs(weighted_mean - 0.5) < 0.02, (
603
+ f"Weighted mean should be close to 0.5, got {weighted_mean}"
604
+ )
605
+
606
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
607
+ def test_backward_compatibility_without_weights(self, df_type):
608
+ """Test that weight_column=None matches original unweighted behavior."""
609
+ np.random.seed(42)
610
+ n = 500
611
+ zeros = np.zeros(200)
612
+ nonzeros = np.random.exponential(scale=2, size=n - 200)
613
+ raw = np.concatenate([zeros, nonzeros])
614
+ np.random.shuffle(raw)
615
+
616
+ df = df_type({"performance": raw})
617
+
618
+ # Unweighted scaler (explicitly None)
619
+ unweighted_scaler = QuantilePerformanceScaler(
620
+ features=["performance"], prefix="", weight_column=None
621
+ )
622
+ unweighted_result = unweighted_scaler.fit_transform(df)
623
+
624
+ # Scaler without weight_column argument
625
+ default_scaler = QuantilePerformanceScaler(features=["performance"], prefix="")
626
+ default_result = default_scaler.fit_transform(df)
627
+
628
+ if isinstance(unweighted_result, pd.DataFrame):
629
+ unweighted_values = unweighted_result["performance"].values
630
+ default_values = default_result["performance"].values
631
+ else:
632
+ unweighted_values = unweighted_result["performance"].to_numpy()
633
+ default_values = default_result["performance"].to_numpy()
634
+
635
+ # Results should be identical
636
+ assert np.allclose(unweighted_values, default_values, atol=1e-10)
637
+
638
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
639
+ def test_equal_weights_matches_unweighted(self, df_type):
640
+ """Test that all equal weights produce same result as unweighted."""
641
+ np.random.seed(42)
642
+ n = 500
643
+ zeros = np.zeros(200)
644
+ nonzeros = np.random.exponential(scale=2, size=n - 200)
645
+ raw = np.concatenate([zeros, nonzeros])
646
+ np.random.shuffle(raw)
647
+ equal_weights = np.ones(n)
648
+
649
+ df = df_type({"performance": raw, "weight": equal_weights})
650
+
651
+ # Weighted scaler with equal weights
652
+ weighted_scaler = QuantilePerformanceScaler(
653
+ features=["performance"], prefix="", weight_column="weight"
654
+ )
655
+ weighted_result = weighted_scaler.fit_transform(df)
656
+
657
+ # Unweighted scaler
658
+ unweighted_scaler = QuantilePerformanceScaler(
659
+ features=["performance"], prefix="", weight_column=None
660
+ )
661
+ unweighted_result = unweighted_scaler.fit_transform(df)
662
+
663
+ if isinstance(weighted_result, pd.DataFrame):
664
+ weighted_values = weighted_result["performance"].values
665
+ unweighted_values = unweighted_result["performance"].values
666
+ else:
667
+ weighted_values = weighted_result["performance"].to_numpy()
668
+ unweighted_values = unweighted_result["performance"].to_numpy()
669
+
670
+ # Results should be very close (may differ slightly due to algorithm differences)
671
+ assert np.allclose(weighted_values, unweighted_values, atol=0.02)
672
+
673
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
674
+ def test_weights_with_zeros_excluded(self, df_type):
675
+ """Test that rows with zero weights are excluded from fitting."""
676
+ np.random.seed(42)
677
+ # Create data where zeros have zero weight
678
+ values = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
679
+ weights = np.array([0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]) # Zero weight for zeros
680
+
681
+ df = df_type({"performance": values, "weight": weights})
682
+
683
+ scaler = QuantilePerformanceScaler(
684
+ features=["performance"], prefix="", weight_column="weight"
685
+ )
686
+ scaler.fit(df)
687
+
688
+ # Zero proportion should be 0 because zero-weight rows are excluded
689
+ assert scaler._zero_proportion["performance"] == 0.0
690
+
691
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
692
+ def test_weighted_zero_proportion(self, df_type):
693
+ """Test that zero proportion is computed using weights."""
694
+ # 3 zeros with weight 10 each = 30
695
+ # 7 non-zeros with weight 10 each = 70
696
+ # Weighted zero proportion = 30/100 = 0.3
697
+ values = np.array([0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
698
+ weights = np.array([10.0] * 10)
699
+
700
+ df = df_type({"performance": values, "weight": weights})
701
+
702
+ scaler = QuantilePerformanceScaler(
703
+ features=["performance"], prefix="", weight_column="weight"
704
+ )
705
+ scaler.fit(df)
706
+
707
+ assert abs(scaler._zero_proportion["performance"] - 0.3) < 1e-10
708
+
709
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
710
+ def test_weighted_zero_proportion_unequal_weights(self, df_type):
711
+ """Test weighted zero proportion with unequal weights."""
712
+ # 2 zeros with weight 5 each = 10
713
+ # 2 non-zeros with weight 15 each = 30
714
+ # Weighted zero proportion = 10/40 = 0.25
715
+ values = np.array([0.0, 0.0, 1.0, 2.0])
716
+ weights = np.array([5.0, 5.0, 15.0, 15.0])
717
+
718
+ df = df_type({"performance": values, "weight": weights})
719
+
720
+ scaler = QuantilePerformanceScaler(
721
+ features=["performance"], prefix="", weight_column="weight"
722
+ )
723
+ scaler.fit(df)
724
+
725
+ assert abs(scaler._zero_proportion["performance"] - 0.25) < 1e-10
726
+
727
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
728
+ def test_monotonicity_preserved_with_weights(self, df_type, weighted_zero_inflated_data):
729
+ """Test that monotonicity is preserved with weighted scaling."""
730
+ values, weights = weighted_zero_inflated_data
731
+ df = df_type({"performance": values, "weight": weights})
732
+
733
+ scaler = QuantilePerformanceScaler(
734
+ features=["performance"], prefix="", weight_column="weight"
735
+ )
736
+ transformed = scaler.fit_transform(df)
737
+
738
+ if isinstance(transformed, pd.DataFrame):
739
+ scaled = transformed["performance"].values
740
+ else:
741
+ scaled = transformed["performance"].to_numpy()
742
+
743
+ # Check monotonicity
744
+ order = np.argsort(values)
745
+ sorted_scaled = scaled[order]
746
+ assert np.all(np.diff(sorted_scaled) >= -1e-10)
747
+
748
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
749
+ def test_bounded_zero_one_with_weights(self, df_type, weighted_zero_inflated_data):
750
+ """Test that output is bounded [0, 1] with weighted scaling."""
751
+ values, weights = weighted_zero_inflated_data
752
+ df = df_type({"performance": values, "weight": weights})
753
+
754
+ scaler = QuantilePerformanceScaler(
755
+ features=["performance"], prefix="", weight_column="weight"
756
+ )
757
+ transformed = scaler.fit_transform(df)
758
+
759
+ if isinstance(transformed, pd.DataFrame):
760
+ scaled = transformed["performance"].values
761
+ else:
762
+ scaled = transformed["performance"].to_numpy()
763
+
764
+ assert np.all((scaled >= 0) & (scaled <= 1))