spforge 0.8.37__py3-none-any.whl → 0.8.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

@@ -89,6 +89,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
89
89
  min_value: float = 0.0,
90
90
  max_value: float = 1.0,
91
91
  zero_inflation_threshold: float = 0.15,
92
+ quantile_weight_column: str | None = None,
92
93
  ):
93
94
  self.features = features
94
95
  self.prefix = prefix
@@ -106,6 +107,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
106
107
  self.min_value = min_value
107
108
  self.max_value = max_value
108
109
  self.zero_inflation_threshold = zero_inflation_threshold
110
+ self.quantile_weight_column = quantile_weight_column
109
111
 
110
112
  self.transformers = create_performance_scalers_transformers(
111
113
  transformer_names=self.transformer_names,
@@ -150,6 +152,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
150
152
  QuantilePerformanceScaler(
151
153
  features=prefixed_features,
152
154
  prefix="",
155
+ weight_column=self.quantile_weight_column,
153
156
  )
154
157
  ]
155
158
  break
@@ -214,6 +217,7 @@ class PerformanceWeightsManager(PerformanceManager):
214
217
  prefix: str = "performance__",
215
218
  return_all_features: bool = False,
216
219
  zero_inflation_threshold: float = 0.15,
220
+ quantile_weight_column: str | None = None,
217
221
  ):
218
222
  self.weights = weights
219
223
  self.return_all_features = return_all_features
@@ -227,6 +231,7 @@ class PerformanceWeightsManager(PerformanceManager):
227
231
  min_value=min_value,
228
232
  performance_column=performance_column,
229
233
  zero_inflation_threshold=zero_inflation_threshold,
234
+ quantile_weight_column=quantile_weight_column,
230
235
  )
231
236
 
232
237
  @nw.narwhalify
@@ -432,6 +432,9 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
432
432
  - Non-zeros → uniform on (π, 1) via empirical CDF
433
433
 
434
434
  Fast: O(n log n) for fit, O(n) for transform.
435
+
436
+ If weight_column is provided, weighted quantiles are computed so that
437
+ the scaling respects participation weights (e.g., minutes played).
435
438
  """
436
439
 
437
440
  def __init__(
@@ -440,11 +443,13 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
440
443
  zero_threshold: float = 1e-10,
441
444
  n_quantiles: int = 1000,
442
445
  prefix: str = "",
446
+ weight_column: str | None = None,
443
447
  ):
444
448
  self.features = features
445
449
  self.zero_threshold = zero_threshold
446
450
  self.n_quantiles = n_quantiles
447
451
  self.prefix = prefix
452
+ self.weight_column = weight_column
448
453
  self.features_out = [self.prefix + f for f in self.features]
449
454
 
450
455
  self._zero_proportion: dict[str, float] = {}
@@ -452,21 +457,82 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
452
457
 
453
458
  @nw.narwhalify
454
459
  def fit(self, df: IntoFrameT, y=None):
460
+ # Get weights if specified
461
+ weights = None
462
+ if self.weight_column is not None:
463
+ weights = df[self.weight_column].to_numpy()
464
+
455
465
  for feature in self.features:
456
466
  values = df[feature].to_numpy()
457
- values = values[np.isfinite(values)]
458
467
 
459
- is_zero = np.abs(values) < self.zero_threshold
460
- self._zero_proportion[feature] = np.mean(is_zero)
468
+ # Create finite mask
469
+ finite_mask = np.isfinite(values)
470
+ if weights is not None:
471
+ # Also require finite, positive weights
472
+ weight_valid = np.isfinite(weights) & (weights > 0)
473
+ finite_mask = finite_mask & weight_valid
474
+
475
+ values_finite = values[finite_mask]
476
+
477
+ if weights is not None:
478
+ weights_finite = weights[finite_mask]
479
+ else:
480
+ weights_finite = None
481
+
482
+ is_zero = np.abs(values_finite) < self.zero_threshold
483
+
484
+ if weights_finite is not None:
485
+ # Weighted zero proportion: sum(weights where zero) / sum(weights)
486
+ total_weight = np.sum(weights_finite)
487
+ if total_weight > 0:
488
+ self._zero_proportion[feature] = np.sum(weights_finite[is_zero]) / total_weight
489
+ else:
490
+ self._zero_proportion[feature] = np.mean(is_zero)
491
+ else:
492
+ self._zero_proportion[feature] = np.mean(is_zero)
493
+
494
+ nonzero_mask = ~is_zero
495
+ nonzero_values = values_finite[nonzero_mask]
461
496
 
462
- nonzero_values = values[~is_zero]
463
497
  if len(nonzero_values) > 0:
464
- percentiles = np.linspace(0, 100, self.n_quantiles + 1)
465
- self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
498
+ if weights_finite is not None:
499
+ # Weighted quantiles using interpolation on weighted CDF
500
+ nonzero_weights = weights_finite[nonzero_mask]
501
+ self._nonzero_quantiles[feature] = self._compute_weighted_quantiles(
502
+ nonzero_values, nonzero_weights
503
+ )
504
+ else:
505
+ percentiles = np.linspace(0, 100, self.n_quantiles + 1)
506
+ self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
466
507
  else:
467
508
  self._nonzero_quantiles[feature] = None
468
509
  return self
469
510
 
511
+ def _compute_weighted_quantiles(
512
+ self, values: np.ndarray, weights: np.ndarray
513
+ ) -> np.ndarray:
514
+ """Compute weighted quantiles using weighted CDF interpolation."""
515
+ # Sort by value
516
+ order = np.argsort(values)
517
+ sorted_values = values[order]
518
+ sorted_weights = weights[order]
519
+
520
+ # Compute weighted CDF
521
+ cumulative_weights = np.cumsum(sorted_weights)
522
+ total_weight = cumulative_weights[-1]
523
+
524
+ # Normalize CDF to [0, 1]
525
+ cdf = cumulative_weights / total_weight
526
+
527
+ # Sample quantiles at evenly spaced CDF positions
528
+ target_cdf = np.linspace(0, 1, self.n_quantiles + 1)
529
+
530
+ # Interpolate to get quantile values
531
+ # Use np.interp which handles edge cases gracefully
532
+ quantiles = np.interp(target_cdf, cdf, sorted_values)
533
+
534
+ return quantiles
535
+
470
536
  @nw.narwhalify
471
537
  def transform(self, df: IntoFrameT) -> IntoFrameT:
472
538
  for feature in self.features:
spforge/ratings/_base.py CHANGED
@@ -7,6 +7,7 @@ from abc import abstractmethod
7
7
  from typing import Any, Literal
8
8
 
9
9
  import narwhals.stable.v2 as nw
10
+ import numpy as np
10
11
  import polars as pl
11
12
  from narwhals.stable.v2 import DataFrame
12
13
  from narwhals.stable.v2.typing import IntoFrameT
@@ -149,6 +150,17 @@ class RatingGenerator(FeatureGenerator):
149
150
 
150
151
  if self.performance_manager:
151
152
  if self.performance_manager:
153
+ # Wire in participation weight column for weighted quantile scaling
154
+ # This ensures zero-inflated distributions use weights for calibration
155
+ if (
156
+ self.column_names
157
+ and self.column_names.participation_weight
158
+ and self.column_names.participation_weight in df.columns
159
+ ):
160
+ self.performance_manager.quantile_weight_column = (
161
+ self.column_names.participation_weight
162
+ )
163
+
152
164
  ori_perf_values = df[self.performance_manager.ori_performance_column].to_list()
153
165
  df = nw.from_native(self.performance_manager.fit_transform(df))
154
166
  assert (
@@ -165,7 +177,26 @@ class RatingGenerator(FeatureGenerator):
165
177
  "Either transform it manually or set auto_scale_performance to True"
166
178
  )
167
179
 
168
- if finite_perf.mean() < 0.42 or finite_perf.mean() > 0.58:
180
+ # Use weighted mean when weighted quantile scaling is active
181
+ # because the weighted mean is what's calibrated to 0.5
182
+ if (
183
+ self.performance_manager
184
+ and self.performance_manager._using_quantile_scaler
185
+ and self.performance_manager.quantile_weight_column
186
+ and self.performance_manager.quantile_weight_column in df.columns
187
+ ):
188
+ weights = df[self.performance_manager.quantile_weight_column]
189
+ valid_mask = perf.is_finite() & weights.is_finite() & (weights > 0)
190
+ if valid_mask.sum() > 0:
191
+ perf_values = perf.filter(valid_mask).to_numpy()
192
+ weight_values = weights.filter(valid_mask).to_numpy()
193
+ mean_val = float(np.average(perf_values, weights=weight_values))
194
+ else:
195
+ mean_val = float(finite_perf.mean())
196
+ else:
197
+ mean_val = float(finite_perf.mean())
198
+
199
+ if mean_val < 0.42 or mean_val > 0.58:
169
200
  raise ValueError(
170
201
  f"Mean {self.performance_column} must be between 0.42 and 0.58. "
171
202
  "Either transform it manually or set auto_scale_performance to True"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.37
3
+ Version: 0.8.39
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -47,10 +47,10 @@ spforge/hyperparameter_tuning/__init__.py,sha256=Vcl8rVlJ7M708iPgqe4XxpZWgJKGux0
47
47
  spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=SjwXLpvYIu_JY8uPRHeL5Kgp1aa0slWDz8qsKDaohWQ,8020
48
48
  spforge/hyperparameter_tuning/_tuner.py,sha256=M79q3saM6r0UZJsRUUgfdDr-3Qii-F2-wuSAZLFtZDo,19246
49
49
  spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAbuKK9M9xHq7IGlQU,482
50
- spforge/performance_transformers/_performance_manager.py,sha256=_qXqj8aaVJyTuUXrZxbOSANwL5iO3Rd1yz9WBwYBTMA,12025
51
- spforge/performance_transformers/_performances_transformers.py,sha256=ZjkFDXoEe5fURpN-dNkrgFXpHEg4aFCWdBDnPyLtgkM,18368
50
+ spforge/performance_transformers/_performance_manager.py,sha256=lh7enqYLd1lXj1VTOiK5N880xkil5q1jRsM51fe_K5g,12322
51
+ spforge/performance_transformers/_performances_transformers.py,sha256=nmjJTEH86JjFneWsnSWIYnUXQoUDskOraDO3VtuufIY,20931
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
- spforge/ratings/_base.py,sha256=Z-VVXWmnzR0O7o2_Q2x2ru_3uiTMpWqKDGQaNBJxCMA,14927
53
+ spforge/ratings/_base.py,sha256=Stl_Y2gjQfS1jq_6CfeRG_e3R5Pei34WETdG6CaibGs,16487
54
54
  spforge/ratings/_player_rating.py,sha256=AIpDEl6cZaC3urcY-jFFgUWd4WZ71A33c5mOPfkXdMs,68178
55
55
  spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
56
56
  spforge/ratings/enums.py,sha256=maG0X4WMQeMVAc2wbceq1an-U-z8moZGeG2BAgfICDA,1809
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.37.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.39.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -92,8 +92,8 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
92
92
  tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
93
93
  tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
95
- tests/performance_transformers/test_performance_manager.py,sha256=Ja2TWq1vGoAqBSSeAWZ26drwEnsF6TmMmpQ0nsMRU_4,19163
96
- tests/performance_transformers/test_performances_transformers.py,sha256=HnW7GKQ6B0ova6Zy0lKbEpA6peZGFE4oi9Th6r7RnG0,18949
95
+ tests/performance_transformers/test_performance_manager.py,sha256=Ob4s86hdnR_4RC9ZG3lpB5O4Gysr2cLyTmCsO6uWomc,21244
96
+ tests/performance_transformers/test_performances_transformers.py,sha256=2OLpFgBolU8e-1Pga3hiOGWWHhjYpfx8Qrf9YXiqjUw,20919
97
97
  tests/ratings/test_player_rating_generator.py,sha256=1Pkx0H8xJMTeLc2Fu9zJcoDpBWiY2zCVSxuBFJk2uEs,110717
98
98
  tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
108
108
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
109
109
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
110
110
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
111
- spforge-0.8.37.dist-info/METADATA,sha256=fLFkSzIsDRPKRpyJ-H126XcKG_NAUyXmJNGDNrogq4s,20048
112
- spforge-0.8.37.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
- spforge-0.8.37.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
114
- spforge-0.8.37.dist-info/RECORD,,
111
+ spforge-0.8.39.dist-info/METADATA,sha256=njbTQ33nwPOZ71PhHQDxUWZzP4MjSavx8sT-JgK2fio,20048
112
+ spforge-0.8.39.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
+ spforge-0.8.39.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
114
+ spforge-0.8.39.dist-info/RECORD,,
@@ -436,6 +436,51 @@ class TestZeroInflationHandling:
436
436
  assert manager._using_quantile_scaler is True
437
437
 
438
438
 
439
+ class TestWeightedQuantileScaling:
440
+ """Test that RatingGenerator wires participation weights to quantile scaling."""
441
+
442
+ def test_rating_generator_wires_weight_column(self):
443
+ """
444
+ RatingGenerator should automatically wire participation_weight to
445
+ quantile_weight_column when using auto_scale_performance with zero-inflated data.
446
+ """
447
+ from spforge import ColumnNames
448
+ from spforge.ratings import PlayerRatingGenerator
449
+
450
+ np.random.seed(42)
451
+ data = {"player_id": [], "team_id": [], "match_id": [], "start_date": [], "perf": [], "minutes": []}
452
+
453
+ for match_idx in range(50):
454
+ date = f"2024-{(match_idx // 28) + 1:02d}-{(match_idx % 28) + 1:02d}"
455
+ for team_idx in range(2):
456
+ for player_idx in range(5):
457
+ minutes = min(np.random.exponential(scale=20) + 5, 48)
458
+ # Zero-inflated: high-minutes players more likely non-zero
459
+ zero_prob = 0.7 - 0.5 * (minutes / 48)
460
+ perf = 0.0 if np.random.random() < zero_prob else np.random.exponential(0.1)
461
+
462
+ data["player_id"].append(f"P{team_idx}_{player_idx}")
463
+ data["team_id"].append(f"T{team_idx}")
464
+ data["match_id"].append(f"M{match_idx}")
465
+ data["start_date"].append(date)
466
+ data["perf"].append(perf)
467
+ data["minutes"].append(minutes / 48)
468
+
469
+ cn = ColumnNames(
470
+ player_id="player_id", team_id="team_id", match_id="match_id",
471
+ start_date="start_date", update_match_id="match_id", participation_weight="minutes",
472
+ )
473
+
474
+ gen = PlayerRatingGenerator(performance_column="perf", column_names=cn, auto_scale_performance=True)
475
+ gen.fit_transform(pl.DataFrame(data))
476
+
477
+ pm = gen.performance_manager
478
+ if pm._using_quantile_scaler:
479
+ assert pm.transformers[-1].weight_column == "minutes", (
480
+ "RatingGenerator should wire quantile_weight_column to participation_weight"
481
+ )
482
+
483
+
439
484
  class TestAutoScalePerformanceBounds:
440
485
  """Tests for ensuring scaled performance stays within [0, 1] bounds."""
441
486
 
@@ -551,3 +551,41 @@ class TestQuantilePerformanceScaler:
551
551
  # Non-zeros should all map to same value (since they're all equal)
552
552
  nonzero_values = transformed["x"].values[~is_zero.values]
553
553
  assert np.allclose(nonzero_values, nonzero_values[0])
554
+
555
+
556
+ class TestWeightedQuantilePerformanceScaler:
557
+ """Tests for weighted quantile scaling algorithm."""
558
+
559
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
560
+ def test_weighted_mean_alignment(self, df_type):
561
+ """Weighted scaling should produce weighted mean close to 0.5."""
562
+ np.random.seed(42)
563
+ n = 1000
564
+ weights = np.random.exponential(scale=20, size=n) + 1
565
+ values = []
566
+ for w in weights:
567
+ zero_prob = 0.6 - 0.4 * (w / weights.max())
568
+ values.append(0.0 if np.random.random() < zero_prob else np.random.exponential(scale=2))
569
+
570
+ df = df_type({"performance": np.array(values), "weight": weights})
571
+ scaler = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column="weight")
572
+ result = scaler.fit_transform(df)
573
+
574
+ scaled = result["performance"].values if isinstance(result, pd.DataFrame) else result["performance"].to_numpy()
575
+ weighted_mean = np.average(scaled, weights=weights)
576
+ assert abs(weighted_mean - 0.5) < 0.02
577
+
578
+ @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
579
+ def test_backward_compatibility_without_weights(self, df_type):
580
+ """weight_column=None should match original unweighted behavior."""
581
+ np.random.seed(42)
582
+ raw = np.concatenate([np.zeros(200), np.random.exponential(scale=2, size=300)])
583
+ np.random.shuffle(raw)
584
+ df = df_type({"performance": raw})
585
+
586
+ result1 = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column=None).fit_transform(df)
587
+ result2 = QuantilePerformanceScaler(features=["performance"], prefix="").fit_transform(df)
588
+
589
+ v1 = result1["performance"].values if isinstance(result1, pd.DataFrame) else result1["performance"].to_numpy()
590
+ v2 = result2["performance"].values if isinstance(result2, pd.DataFrame) else result2["performance"].to_numpy()
591
+ assert np.allclose(v1, v2, atol=1e-10)