spforge 0.8.16__py3-none-any.whl → 0.8.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

spforge/scorer/_score.py CHANGED
@@ -366,18 +366,49 @@ class PWMSE(BaseScorer):
366
366
  self.labels = labels
367
367
  self.evaluation_labels = evaluation_labels
368
368
 
369
+ self._needs_extension = False
370
+ self._needs_slicing = False
369
371
  self._eval_indices: list[int] | None = None
372
+ self._extension_mapping: dict[int, int] | None = None
373
+
370
374
  if self.evaluation_labels is not None and self.labels is not None:
371
- label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
372
- self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
375
+ training_set = set(self.labels)
376
+ eval_set = set(self.evaluation_labels)
377
+
378
+ if eval_set <= training_set:
379
+ self._needs_slicing = True
380
+ label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
381
+ self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
382
+ elif training_set <= eval_set:
383
+ self._needs_extension = True
384
+ eval_label_to_idx = {lbl: i for i, lbl in enumerate(self.evaluation_labels)}
385
+ self._extension_mapping = {
386
+ train_idx: eval_label_to_idx[lbl]
387
+ for train_idx, lbl in enumerate(self.labels)
388
+ }
389
+ else:
390
+ raise ValueError(
391
+ f"evaluation_labels must be a subset or superset of labels. "
392
+ f"labels={self.labels}, evaluation_labels={self.evaluation_labels}"
393
+ )
394
+
395
+ def _align_predictions(self, preds: np.ndarray) -> np.ndarray:
396
+ if self._needs_slicing and self._eval_indices is not None:
397
+ sliced = preds[:, self._eval_indices]
398
+ row_sums = sliced.sum(axis=1, keepdims=True)
399
+ row_sums = np.where(row_sums == 0, 1.0, row_sums)
400
+ return sliced / row_sums
401
+
402
+ if self._needs_extension and self._extension_mapping is not None:
403
+ n_samples = preds.shape[0]
404
+ n_eval_labels = len(self.evaluation_labels)
405
+ extended = np.full((n_samples, n_eval_labels), 1e-5, dtype=np.float64)
406
+ for train_idx, eval_idx in self._extension_mapping.items():
407
+ extended[:, eval_idx] = preds[:, train_idx]
408
+ row_sums = extended.sum(axis=1, keepdims=True)
409
+ return extended / row_sums
373
410
 
374
- def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
375
- if self._eval_indices is None:
376
- return preds
377
- sliced = preds[:, self._eval_indices]
378
- row_sums = sliced.sum(axis=1, keepdims=True)
379
- row_sums = np.where(row_sums == 0, 1.0, row_sums)
380
- return sliced / row_sums
411
+ return preds
381
412
 
382
413
  def _get_scoring_labels(self) -> list[int]:
383
414
  if self.evaluation_labels is not None:
@@ -446,7 +477,7 @@ class PWMSE(BaseScorer):
446
477
 
447
478
  targets = gran_df[self.target].to_numpy().astype(np.float64)
448
479
  preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
449
- preds = self._slice_and_renormalize(preds)
480
+ preds = self._align_predictions(preds)
450
481
  score = self._pwmse_score(targets, preds)
451
482
  if self.compare_to_naive:
452
483
  naive_probs_list = _naive_probability_predictions_for_df(
@@ -464,7 +495,7 @@ class PWMSE(BaseScorer):
464
495
 
465
496
  targets = df[self.target].to_numpy().astype(np.float64)
466
497
  preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
467
- preds = self._slice_and_renormalize(preds)
498
+ preds = self._align_predictions(preds)
468
499
  score = self._pwmse_score(targets, preds)
469
500
  if self.compare_to_naive:
470
501
  naive_probs_list = _naive_probability_predictions_for_df(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.16
3
+ Version: 0.8.17
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -62,7 +62,7 @@ spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH
62
62
  spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
63
63
  spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
64
64
  spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
65
- spforge/scorer/_score.py,sha256=TR0T9nJj0aeVgGfOE0fZmXlO66CELulYwxhi7ZAxhvY,56184
65
+ spforge/scorer/_score.py,sha256=kNuqiK3F5mUEAVD7KjWYY7E_AkRrspR362QBm_jyElg,57623
66
66
  spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
67
67
  spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
68
68
  spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
@@ -71,7 +71,7 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
75
  tests/test_autopipeline.py,sha256=g5SMTTolfRikHZfwIkExuoRjh-ldcr9-F-E1PUNpbpM,22923
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
@@ -99,7 +99,7 @@ tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcT
99
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
100
100
  tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
101
101
  tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
102
- tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
102
+ tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
103
103
  tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
104
104
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
105
105
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
@@ -107,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
107
107
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
108
108
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
109
109
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
110
- spforge-0.8.16.dist-info/METADATA,sha256=POeA0zCWQgYcTHLIvJwlqx8TAM4bL5ec_uTHTR5WoHA,20048
111
- spforge-0.8.16.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
- spforge-0.8.16.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
- spforge-0.8.16.dist-info/RECORD,,
110
+ spforge-0.8.17.dist-info/METADATA,sha256=Zc4fLlCtPWuEFvs0DVRZre9OtTvRyVMgdmGV7-s68Ao,20048
111
+ spforge-0.8.17.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
+ spforge-0.8.17.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
+ spforge-0.8.17.dist-info/RECORD,,
@@ -2138,3 +2138,145 @@ def test_scorers_respect_validation_column(scorer_factory, df_factory):
2138
2138
  score_all = scorer_factory().score(df)
2139
2139
  score_valid = scorer_factory().score(df_valid)
2140
2140
  assert score_all == score_valid
2141
+
2142
+
2143
+ # ============================================================================
2144
+ # PWMSE evaluation_labels Extension Tests
2145
+ # ============================================================================
2146
+
2147
+
2148
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
2149
+ def test_pwmse__evaluation_labels_extends_predictions(df_type):
2150
+ """PWMSE with evaluation_labels as superset extends predictions with small probs."""
2151
+ df = create_dataframe(
2152
+ df_type,
2153
+ {
2154
+ "pred": [
2155
+ [0.3, 0.5, 0.2],
2156
+ [0.2, 0.6, 0.2],
2157
+ ],
2158
+ "target": [0, 1],
2159
+ },
2160
+ )
2161
+
2162
+ scorer = PWMSE(
2163
+ pred_column="pred",
2164
+ target="target",
2165
+ labels=[0, 1, 2],
2166
+ evaluation_labels=[-1, 0, 1, 2, 3],
2167
+ )
2168
+ score = scorer.score(df)
2169
+
2170
+ n_eval_labels = 5
2171
+ eps = 1e-5
2172
+ preds_original = np.array([[0.3, 0.5, 0.2], [0.2, 0.6, 0.2]])
2173
+ extended = np.full((2, n_eval_labels), eps, dtype=np.float64)
2174
+ extended[:, 1] = preds_original[:, 0]
2175
+ extended[:, 2] = preds_original[:, 1]
2176
+ extended[:, 3] = preds_original[:, 2]
2177
+ row_sums = extended.sum(axis=1, keepdims=True)
2178
+ preds_renorm = extended / row_sums
2179
+
2180
+ eval_labels = np.array([-1, 0, 1, 2, 3], dtype=np.float64)
2181
+ targets = np.array([0, 1], dtype=np.float64)
2182
+ diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
2183
+ expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
2184
+
2185
+ assert abs(score - expected) < 1e-10
2186
+
2187
+
2188
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
2189
+ def test_pwmse__evaluation_labels_exact_match(df_type):
2190
+ """PWMSE with evaluation_labels identical to labels (no-op)."""
2191
+ df = create_dataframe(
2192
+ df_type,
2193
+ {
2194
+ "pred": [
2195
+ [0.3, 0.5, 0.2],
2196
+ [0.2, 0.6, 0.2],
2197
+ ],
2198
+ "target": [0, 1],
2199
+ },
2200
+ )
2201
+
2202
+ scorer_with_eval = PWMSE(
2203
+ pred_column="pred",
2204
+ target="target",
2205
+ labels=[0, 1, 2],
2206
+ evaluation_labels=[0, 1, 2],
2207
+ )
2208
+ scorer_without_eval = PWMSE(
2209
+ pred_column="pred",
2210
+ target="target",
2211
+ labels=[0, 1, 2],
2212
+ )
2213
+
2214
+ score_with = scorer_with_eval.score(df)
2215
+ score_without = scorer_without_eval.score(df)
2216
+
2217
+ assert abs(score_with - score_without) < 1e-10
2218
+
2219
+
2220
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
2221
+ def test_pwmse__evaluation_labels_partial_overlap_raises(df_type):
2222
+ """PWMSE with partial overlap between labels and evaluation_labels raises."""
2223
+ with pytest.raises(ValueError, match="evaluation_labels must be a subset or superset"):
2224
+ PWMSE(
2225
+ pred_column="pred",
2226
+ target="target",
2227
+ labels=[0, 1, 2],
2228
+ evaluation_labels=[1, 2, 3],
2229
+ )
2230
+
2231
+
2232
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
2233
+ def test_pwmse__evaluation_labels_extends_with_compare_to_naive(df_type):
2234
+ """PWMSE extension mode works correctly with compare_to_naive."""
2235
+ df = create_dataframe(
2236
+ df_type,
2237
+ {
2238
+ "pred": [
2239
+ [0.8, 0.15, 0.05],
2240
+ [0.1, 0.7, 0.2],
2241
+ [0.05, 0.15, 0.8],
2242
+ [0.3, 0.4, 0.3],
2243
+ ],
2244
+ "target": [0, 1, 2, 1],
2245
+ },
2246
+ )
2247
+
2248
+ scorer = PWMSE(
2249
+ pred_column="pred",
2250
+ target="target",
2251
+ labels=[0, 1, 2],
2252
+ evaluation_labels=[-1, 0, 1, 2, 3],
2253
+ compare_to_naive=True,
2254
+ )
2255
+ score = scorer.score(df)
2256
+
2257
+ n_eval_labels = 5
2258
+ eps = 1e-5
2259
+ preds_original = np.array([
2260
+ [0.8, 0.15, 0.05],
2261
+ [0.1, 0.7, 0.2],
2262
+ [0.05, 0.15, 0.8],
2263
+ [0.3, 0.4, 0.3],
2264
+ ])
2265
+ extended = np.full((4, n_eval_labels), eps, dtype=np.float64)
2266
+ extended[:, 1] = preds_original[:, 0]
2267
+ extended[:, 2] = preds_original[:, 1]
2268
+ extended[:, 3] = preds_original[:, 2]
2269
+ row_sums = extended.sum(axis=1, keepdims=True)
2270
+ preds_renorm = extended / row_sums
2271
+
2272
+ eval_labels = np.array([-1, 0, 1, 2, 3], dtype=np.float64)
2273
+ targets = np.array([0, 1, 2, 1], dtype=np.float64)
2274
+ diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
2275
+ model_score = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
2276
+
2277
+ naive_probs = np.array([0.0, 0.25, 0.5, 0.25, 0.0])
2278
+ naive_preds = np.tile(naive_probs, (4, 1))
2279
+ naive_score = float((diffs_sqd * naive_preds).sum(axis=1).mean())
2280
+
2281
+ expected = naive_score - model_score
2282
+ assert abs(score - expected) < 1e-10