PyPI - spforge - Versions diffs - 0.8.37__py3-none-any.whl → 0.8.39__py3-none-any.whl - Mend

spforge 0.8.37py3-none-any.whl → 0.8.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spforge might be problematic. Click here for more details.

Files changed (10) hide show

spforge/performance_transformers/_performance_manager.py CHANGED Viewed

@@ -89,6 +89,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
         min_value: float = 0.0,
         max_value: float = 1.0,
         zero_inflation_threshold: float = 0.15,
+        quantile_weight_column: str | None = None,
     ):
         self.features = features
         self.prefix = prefix
@@ -106,6 +107,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
         self.min_value = min_value
         self.max_value = max_value
         self.zero_inflation_threshold = zero_inflation_threshold
+        self.quantile_weight_column = quantile_weight_column
         self.transformers = create_performance_scalers_transformers(
             transformer_names=self.transformer_names,
@@ -150,6 +152,7 @@ class PerformanceManager(BaseEstimator, TransformerMixin):
                             QuantilePerformanceScaler(
                                 features=prefixed_features,
                                 prefix="",
+                                weight_column=self.quantile_weight_column,
                             )
                         ]
                         break
@@ -214,6 +217,7 @@ class PerformanceWeightsManager(PerformanceManager):
         prefix: str = "performance__",
         return_all_features: bool = False,
         zero_inflation_threshold: float = 0.15,
+        quantile_weight_column: str | None = None,
     ):
         self.weights = weights
         self.return_all_features = return_all_features
@@ -227,6 +231,7 @@ class PerformanceWeightsManager(PerformanceManager):
             min_value=min_value,
             performance_column=performance_column,
             zero_inflation_threshold=zero_inflation_threshold,
+            quantile_weight_column=quantile_weight_column,
         )
     @nw.narwhalify

spforge/performance_transformers/_performances_transformers.py CHANGED Viewed

@@ -432,6 +432,9 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
     - Non-zeros → uniform on (π, 1) via empirical CDF
     Fast: O(n log n) for fit, O(n) for transform.
+    If weight_column is provided, weighted quantiles are computed so that
+    the scaling respects participation weights (e.g., minutes played).
     """
     def __init__(
@@ -440,11 +443,13 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
         zero_threshold: float = 1e-10,
         n_quantiles: int = 1000,
         prefix: str = "",
+        weight_column: str | None = None,
     ):
         self.features = features
         self.zero_threshold = zero_threshold
         self.n_quantiles = n_quantiles
         self.prefix = prefix
+        self.weight_column = weight_column
         self.features_out = [self.prefix + f for f in self.features]
         self._zero_proportion: dict[str, float] = {}
@@ -452,21 +457,82 @@ class QuantilePerformanceScaler(BaseEstimator, TransformerMixin):
     @nw.narwhalify
     def fit(self, df: IntoFrameT, y=None):
+        # Get weights if specified
+        weights = None
+        if self.weight_column is not None:
+            weights = df[self.weight_column].to_numpy()
         for feature in self.features:
             values = df[feature].to_numpy()
-            values = values[np.isfinite(values)]
-            is_zero = np.abs(values) < self.zero_threshold
-            self._zero_proportion[feature] = np.mean(is_zero)
+            # Create finite mask
+            finite_mask = np.isfinite(values)
+            if weights is not None:
+                # Also require finite, positive weights
+                weight_valid = np.isfinite(weights) & (weights > 0)
+                finite_mask = finite_mask & weight_valid
+            values_finite = values[finite_mask]
+            if weights is not None:
+                weights_finite = weights[finite_mask]
+            else:
+                weights_finite = None
+            is_zero = np.abs(values_finite) < self.zero_threshold
+            if weights_finite is not None:
+                # Weighted zero proportion: sum(weights where zero) / sum(weights)
+                total_weight = np.sum(weights_finite)
+                if total_weight > 0:
+                    self._zero_proportion[feature] = np.sum(weights_finite[is_zero]) / total_weight
+                else:
+                    self._zero_proportion[feature] = np.mean(is_zero)
+            else:
+                self._zero_proportion[feature] = np.mean(is_zero)
+            nonzero_mask = ~is_zero
+            nonzero_values = values_finite[nonzero_mask]
-            nonzero_values = values[~is_zero]
             if len(nonzero_values) > 0:
-                percentiles = np.linspace(0, 100, self.n_quantiles + 1)
-                self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
+                if weights_finite is not None:
+                    # Weighted quantiles using interpolation on weighted CDF
+                    nonzero_weights = weights_finite[nonzero_mask]
+                    self._nonzero_quantiles[feature] = self._compute_weighted_quantiles(
+                        nonzero_values, nonzero_weights
+                    )
+                else:
+                    percentiles = np.linspace(0, 100, self.n_quantiles + 1)
+                    self._nonzero_quantiles[feature] = np.percentile(nonzero_values, percentiles)
             else:
                 self._nonzero_quantiles[feature] = None
         return self
+    def _compute_weighted_quantiles(
+        self, values: np.ndarray, weights: np.ndarray
+    ) -> np.ndarray:
+        """Compute weighted quantiles using weighted CDF interpolation."""
+        # Sort by value
+        order = np.argsort(values)
+        sorted_values = values[order]
+        sorted_weights = weights[order]
+        # Compute weighted CDF
+        cumulative_weights = np.cumsum(sorted_weights)
+        total_weight = cumulative_weights[-1]
+        # Normalize CDF to [0, 1]
+        cdf = cumulative_weights / total_weight
+        # Sample quantiles at evenly spaced CDF positions
+        target_cdf = np.linspace(0, 1, self.n_quantiles + 1)
+        # Interpolate to get quantile values
+        # Use np.interp which handles edge cases gracefully
+        quantiles = np.interp(target_cdf, cdf, sorted_values)
+        return quantiles
     @nw.narwhalify
     def transform(self, df: IntoFrameT) -> IntoFrameT:
         for feature in self.features:

spforge/ratings/_base.py CHANGED Viewed

@@ -7,6 +7,7 @@ from abc import abstractmethod
 from typing import Any, Literal
 import narwhals.stable.v2 as nw
+import numpy as np
 import polars as pl
 from narwhals.stable.v2 import DataFrame
 from narwhals.stable.v2.typing import IntoFrameT
@@ -149,6 +150,17 @@ class RatingGenerator(FeatureGenerator):
         if self.performance_manager:
             if self.performance_manager:
+                # Wire in participation weight column for weighted quantile scaling
+                # This ensures zero-inflated distributions use weights for calibration
+                if (
+                    self.column_names
+                    and self.column_names.participation_weight
+                    and self.column_names.participation_weight in df.columns
+                ):
+                    self.performance_manager.quantile_weight_column = (
+                        self.column_names.participation_weight
+                    )
                 ori_perf_values = df[self.performance_manager.ori_performance_column].to_list()
                 df = nw.from_native(self.performance_manager.fit_transform(df))
                 assert (
@@ -165,7 +177,26 @@ class RatingGenerator(FeatureGenerator):
                     "Either transform it manually or set auto_scale_performance to True"
                 )
-            if finite_perf.mean() < 0.42 or finite_perf.mean() > 0.58:
+            # Use weighted mean when weighted quantile scaling is active
+            # because the weighted mean is what's calibrated to 0.5
+            if (
+                self.performance_manager
+                and self.performance_manager._using_quantile_scaler
+                and self.performance_manager.quantile_weight_column
+                and self.performance_manager.quantile_weight_column in df.columns
+            ):
+                weights = df[self.performance_manager.quantile_weight_column]
+                valid_mask = perf.is_finite() & weights.is_finite() & (weights > 0)
+                if valid_mask.sum() > 0:
+                    perf_values = perf.filter(valid_mask).to_numpy()
+                    weight_values = weights.filter(valid_mask).to_numpy()
+                    mean_val = float(np.average(perf_values, weights=weight_values))
+                else:
+                    mean_val = float(finite_perf.mean())
+            else:
+                mean_val = float(finite_perf.mean())
+            if mean_val < 0.42 or mean_val > 0.58:
                 raise ValueError(
                     f"Mean {self.performance_column} must be between 0.42 and 0.58. "
                     "Either transform it manually or set auto_scale_performance to True"

{spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: spforge
-Version: 0.8.37
+Version: 0.8.39
 Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
 Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
 License: See LICENSE file

{spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/RECORD RENAMED Viewed

@@ -47,10 +47,10 @@ spforge/hyperparameter_tuning/__init__.py,sha256=Vcl8rVlJ7M708iPgqe4XxpZWgJKGux0
 spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=SjwXLpvYIu_JY8uPRHeL5Kgp1aa0slWDz8qsKDaohWQ,8020
 spforge/hyperparameter_tuning/_tuner.py,sha256=M79q3saM6r0UZJsRUUgfdDr-3Qii-F2-wuSAZLFtZDo,19246
 spforge/performance_transformers/__init__.py,sha256=J-5olqi1M_BUj3sN1NqAz9s28XAbuKK9M9xHq7IGlQU,482
-spforge/performance_transformers/_performance_manager.py,sha256=_qXqj8aaVJyTuUXrZxbOSANwL5iO3Rd1yz9WBwYBTMA,12025
-spforge/performance_transformers/_performances_transformers.py,sha256=ZjkFDXoEe5fURpN-dNkrgFXpHEg4aFCWdBDnPyLtgkM,18368
+spforge/performance_transformers/_performance_manager.py,sha256=lh7enqYLd1lXj1VTOiK5N880xkil5q1jRsM51fe_K5g,12322
+spforge/performance_transformers/_performances_transformers.py,sha256=nmjJTEH86JjFneWsnSWIYnUXQoUDskOraDO3VtuufIY,20931
 spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
-spforge/ratings/_base.py,sha256=Z-VVXWmnzR0O7o2_Q2x2ru_3uiTMpWqKDGQaNBJxCMA,14927
+spforge/ratings/_base.py,sha256=Stl_Y2gjQfS1jq_6CfeRG_e3R5Pei34WETdG6CaibGs,16487
 spforge/ratings/_player_rating.py,sha256=AIpDEl6cZaC3urcY-jFFgUWd4WZ71A33c5mOPfkXdMs,68178
 spforge/ratings/_team_rating.py,sha256=3m90-R2zW0k5EHwjw-83Hacz91fGmxW1LQ8ZUGHlgt4,24970
 spforge/ratings/enums.py,sha256=maG0X4WMQeMVAc2wbceq1an-U-z8moZGeG2BAgfICDA,1809
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo
 spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
 spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
 spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
-spforge-0.8.37.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+spforge-0.8.39.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
 tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
 tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
 tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -92,8 +92,8 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
 tests/feature_generator/test_rolling_window.py,sha256=_o9oljcAIZ14iI7e8WFeAsfXxILnyqBffit21HOvII4,24378
 tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
 tests/hyperparameter_tuning/test_rating_tuner.py,sha256=usjC2ioO_yWRjjNAlRTyMVYheOrCi0kKocmHQHdTmpM,18699
-tests/performance_transformers/test_performance_manager.py,sha256=Ja2TWq1vGoAqBSSeAWZ26drwEnsF6TmMmpQ0nsMRU_4,19163
-tests/performance_transformers/test_performances_transformers.py,sha256=HnW7GKQ6B0ova6Zy0lKbEpA6peZGFE4oi9Th6r7RnG0,18949
+tests/performance_transformers/test_performance_manager.py,sha256=Ob4s86hdnR_4RC9ZG3lpB5O4Gysr2cLyTmCsO6uWomc,21244
+tests/performance_transformers/test_performances_transformers.py,sha256=2OLpFgBolU8e-1Pga3hiOGWWHhjYpfx8Qrf9YXiqjUw,20919
 tests/ratings/test_player_rating_generator.py,sha256=1Pkx0H8xJMTeLc2Fu9zJcoDpBWiY2zCVSxuBFJk2uEs,110717
 tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
 tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
@@ -108,7 +108,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
 tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
 tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
 tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
-spforge-0.8.37.dist-info/METADATA,sha256=fLFkSzIsDRPKRpyJ-H126XcKG_NAUyXmJNGDNrogq4s,20048
-spforge-0.8.37.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-spforge-0.8.37.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
-spforge-0.8.37.dist-info/RECORD,,
+spforge-0.8.39.dist-info/METADATA,sha256=njbTQ33nwPOZ71PhHQDxUWZzP4MjSavx8sT-JgK2fio,20048
+spforge-0.8.39.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+spforge-0.8.39.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
+spforge-0.8.39.dist-info/RECORD,,

tests/performance_transformers/test_performance_manager.py CHANGED Viewed

@@ -436,6 +436,51 @@ class TestZeroInflationHandling:
         assert manager._using_quantile_scaler is True
+class TestWeightedQuantileScaling:
+    """Test that RatingGenerator wires participation weights to quantile scaling."""
+    def test_rating_generator_wires_weight_column(self):
+        """
+        RatingGenerator should automatically wire participation_weight to
+        quantile_weight_column when using auto_scale_performance with zero-inflated data.
+        """
+        from spforge import ColumnNames
+        from spforge.ratings import PlayerRatingGenerator
+        np.random.seed(42)
+        data = {"player_id": [], "team_id": [], "match_id": [], "start_date": [], "perf": [], "minutes": []}
+        for match_idx in range(50):
+            date = f"2024-{(match_idx // 28) + 1:02d}-{(match_idx % 28) + 1:02d}"
+            for team_idx in range(2):
+                for player_idx in range(5):
+                    minutes = min(np.random.exponential(scale=20) + 5, 48)
+                    # Zero-inflated: high-minutes players more likely non-zero
+                    zero_prob = 0.7 - 0.5 * (minutes / 48)
+                    perf = 0.0 if np.random.random() < zero_prob else np.random.exponential(0.1)
+                    data["player_id"].append(f"P{team_idx}_{player_idx}")
+                    data["team_id"].append(f"T{team_idx}")
+                    data["match_id"].append(f"M{match_idx}")
+                    data["start_date"].append(date)
+                    data["perf"].append(perf)
+                    data["minutes"].append(minutes / 48)
+        cn = ColumnNames(
+            player_id="player_id", team_id="team_id", match_id="match_id",
+            start_date="start_date", update_match_id="match_id", participation_weight="minutes",
+        )
+        gen = PlayerRatingGenerator(performance_column="perf", column_names=cn, auto_scale_performance=True)
+        gen.fit_transform(pl.DataFrame(data))
+        pm = gen.performance_manager
+        if pm._using_quantile_scaler:
+            assert pm.transformers[-1].weight_column == "minutes", (
+                "RatingGenerator should wire quantile_weight_column to participation_weight"
+            )
 class TestAutoScalePerformanceBounds:
     """Tests for ensuring scaled performance stays within [0, 1] bounds."""

tests/performance_transformers/test_performances_transformers.py CHANGED Viewed

@@ -551,3 +551,41 @@ class TestQuantilePerformanceScaler:
         # Non-zeros should all map to same value (since they're all equal)
         nonzero_values = transformed["x"].values[~is_zero.values]
         assert np.allclose(nonzero_values, nonzero_values[0])
+class TestWeightedQuantilePerformanceScaler:
+    """Tests for weighted quantile scaling algorithm."""
+    @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
+    def test_weighted_mean_alignment(self, df_type):
+        """Weighted scaling should produce weighted mean close to 0.5."""
+        np.random.seed(42)
+        n = 1000
+        weights = np.random.exponential(scale=20, size=n) + 1
+        values = []
+        for w in weights:
+            zero_prob = 0.6 - 0.4 * (w / weights.max())
+            values.append(0.0 if np.random.random() < zero_prob else np.random.exponential(scale=2))
+        df = df_type({"performance": np.array(values), "weight": weights})
+        scaler = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column="weight")
+        result = scaler.fit_transform(df)
+        scaled = result["performance"].values if isinstance(result, pd.DataFrame) else result["performance"].to_numpy()
+        weighted_mean = np.average(scaled, weights=weights)
+        assert abs(weighted_mean - 0.5) < 0.02
+    @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
+    def test_backward_compatibility_without_weights(self, df_type):
+        """weight_column=None should match original unweighted behavior."""
+        np.random.seed(42)
+        raw = np.concatenate([np.zeros(200), np.random.exponential(scale=2, size=300)])
+        np.random.shuffle(raw)
+        df = df_type({"performance": raw})
+        result1 = QuantilePerformanceScaler(features=["performance"], prefix="", weight_column=None).fit_transform(df)
+        result2 = QuantilePerformanceScaler(features=["performance"], prefix="").fit_transform(df)
+        v1 = result1["performance"].values if isinstance(result1, pd.DataFrame) else result1["performance"].to_numpy()
+        v2 = result2["performance"].values if isinstance(result2, pd.DataFrame) else result2["performance"].to_numpy()
+        assert np.allclose(v1, v2, atol=1e-10)

{spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/WHEEL RENAMED Viewed

File without changes

{spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{spforge-0.8.37.dist-info → spforge-0.8.39.dist-info}/top_level.txt RENAMED Viewed

File without changes

spforge 0.8.37__py3-none-any.whl → 0.8.39__py3-none-any.whl

Potentially problematic release.

spforge 0.8.37py3-none-any.whl → 0.8.39py3-none-any.whl