spforge 0.8.8__py3-none-any.whl → 0.8.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

spforge/ratings/utils.py CHANGED
@@ -2,6 +2,10 @@ import polars as pl
2
2
 
3
3
  from spforge.data_structures import ColumnNames
4
4
 
5
+ # Internal column names for scaled participation weights
6
+ _SCALED_PW = "__scaled_participation_weight__"
7
+ _SCALED_PPW = "__scaled_projected_participation_weight__"
8
+
5
9
 
6
10
  def add_team_rating(
7
11
  df: pl.DataFrame,
@@ -46,11 +50,14 @@ def add_team_rating_projected(
46
50
  tid = column_names.team_id
47
51
  ppw = column_names.projected_participation_weight
48
52
 
49
- if ppw:
53
+ # Use scaled column if available (clipped to [0, 1]), otherwise raw column
54
+ weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
55
+
56
+ if weight_col and weight_col in df.columns:
50
57
  return df.with_columns(
51
58
  (
52
- (pl.col(ppw) * pl.col(player_rating_col)).sum().over([mid, tid])
53
- / pl.col(ppw).sum().over([mid, tid])
59
+ (pl.col(weight_col) * pl.col(player_rating_col)).sum().over([mid, tid])
60
+ / pl.col(weight_col).sum().over([mid, tid])
54
61
  ).alias(team_rating_out)
55
62
  )
56
63
 
@@ -118,11 +125,14 @@ def add_rating_mean_projected(
118
125
  mid = column_names.match_id
119
126
  ppw = column_names.projected_participation_weight
120
127
 
121
- if ppw:
128
+ # Use scaled column if available (clipped to [0, 1]), otherwise raw column
129
+ weight_col = _SCALED_PPW if _SCALED_PPW in df.columns else ppw
130
+
131
+ if weight_col and weight_col in df.columns:
122
132
  return df.with_columns(
123
133
  (
124
- (pl.col(ppw) * pl.col(player_rating_col)).sum().over(mid)
125
- / pl.col(ppw).sum().over(mid)
134
+ (pl.col(weight_col) * pl.col(player_rating_col)).sum().over(mid)
135
+ / pl.col(weight_col).sum().over(mid)
126
136
  ).alias(rating_mean_out)
127
137
  )
128
138
 
spforge/scorer/_score.py CHANGED
@@ -366,18 +366,49 @@ class PWMSE(BaseScorer):
366
366
  self.labels = labels
367
367
  self.evaluation_labels = evaluation_labels
368
368
 
369
+ self._needs_extension = False
370
+ self._needs_slicing = False
369
371
  self._eval_indices: list[int] | None = None
372
+ self._extension_mapping: dict[int, int] | None = None
373
+
370
374
  if self.evaluation_labels is not None and self.labels is not None:
371
- label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
372
- self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
375
+ training_set = set(self.labels)
376
+ eval_set = set(self.evaluation_labels)
377
+
378
+ if eval_set <= training_set:
379
+ self._needs_slicing = True
380
+ label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
381
+ self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
382
+ elif training_set <= eval_set:
383
+ self._needs_extension = True
384
+ eval_label_to_idx = {lbl: i for i, lbl in enumerate(self.evaluation_labels)}
385
+ self._extension_mapping = {
386
+ train_idx: eval_label_to_idx[lbl]
387
+ for train_idx, lbl in enumerate(self.labels)
388
+ }
389
+ else:
390
+ raise ValueError(
391
+ f"evaluation_labels must be a subset or superset of labels. "
392
+ f"labels={self.labels}, evaluation_labels={self.evaluation_labels}"
393
+ )
394
+
395
+ def _align_predictions(self, preds: np.ndarray) -> np.ndarray:
396
+ if self._needs_slicing and self._eval_indices is not None:
397
+ sliced = preds[:, self._eval_indices]
398
+ row_sums = sliced.sum(axis=1, keepdims=True)
399
+ row_sums = np.where(row_sums == 0, 1.0, row_sums)
400
+ return sliced / row_sums
401
+
402
+ if self._needs_extension and self._extension_mapping is not None:
403
+ n_samples = preds.shape[0]
404
+ n_eval_labels = len(self.evaluation_labels)
405
+ extended = np.full((n_samples, n_eval_labels), 1e-5, dtype=np.float64)
406
+ for train_idx, eval_idx in self._extension_mapping.items():
407
+ extended[:, eval_idx] = preds[:, train_idx]
408
+ row_sums = extended.sum(axis=1, keepdims=True)
409
+ return extended / row_sums
373
410
 
374
- def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
375
- if self._eval_indices is None:
376
- return preds
377
- sliced = preds[:, self._eval_indices]
378
- row_sums = sliced.sum(axis=1, keepdims=True)
379
- row_sums = np.where(row_sums == 0, 1.0, row_sums)
380
- return sliced / row_sums
411
+ return preds
381
412
 
382
413
  def _get_scoring_labels(self) -> list[int]:
383
414
  if self.evaluation_labels is not None:
@@ -446,7 +477,7 @@ class PWMSE(BaseScorer):
446
477
 
447
478
  targets = gran_df[self.target].to_numpy().astype(np.float64)
448
479
  preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
449
- preds = self._slice_and_renormalize(preds)
480
+ preds = self._align_predictions(preds)
450
481
  score = self._pwmse_score(targets, preds)
451
482
  if self.compare_to_naive:
452
483
  naive_probs_list = _naive_probability_predictions_for_df(
@@ -464,7 +495,7 @@ class PWMSE(BaseScorer):
464
495
 
465
496
  targets = df[self.target].to_numpy().astype(np.float64)
466
497
  preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
467
- preds = self._slice_and_renormalize(preds)
498
+ preds = self._align_predictions(preds)
468
499
  score = self._pwmse_score(targets, preds)
469
500
  if self.compare_to_naive:
470
501
  naive_probs_list = _naive_probability_predictions_for_df(
@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
8
8
 
9
9
 
10
10
  class GroupByReducer(BaseEstimator, TransformerMixin):
11
- def __init__(self, granularity: list[str]):
11
+ def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
12
12
  self.granularity = granularity
13
+ self.aggregation_weight = aggregation_weight
13
14
 
14
15
  @nw.narwhalify
15
16
  def fit(self, X: IntoFrameT, y: Any = None):
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
26
27
  raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
27
28
 
28
29
  non_keys = [c for c in df.columns if c not in keys]
29
- num_cols = [c for c in non_keys if pd.api.types.is_numeric_dtype(df[c])]
30
+ schema = df.schema
31
+ num_cols = [c for c in non_keys if schema[c].is_numeric()]
30
32
  other_cols = [c for c in non_keys if c not in num_cols]
31
33
 
32
34
  aggs: list[nw.Expr] = []
33
35
 
36
+ # Backwards compatibility: old pickled objects may not have aggregation_weight
37
+ weight_col = getattr(self, "aggregation_weight", None)
38
+ has_weight = weight_col and weight_col in df.columns
39
+
34
40
  for c in num_cols:
35
- aggs.append(nw.col(c).mean().alias(c))
41
+ if c == weight_col:
42
+ aggs.append(nw.col(c).sum().alias(c))
43
+ elif has_weight:
44
+ aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
45
+ aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
46
+ else:
47
+ aggs.append(nw.col(c).mean().alias(c))
36
48
 
37
49
  for c in other_cols:
38
50
  aggs.append(nw.col(c).first().alias(c))
39
51
 
52
+ if has_weight:
53
+ aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
54
+
40
55
  out = df.group_by(keys).agg(aggs)
56
+
57
+ if has_weight:
58
+ weighted_cols = [c for c in num_cols if c != weight_col]
59
+ for c in weighted_cols:
60
+ out = out.with_columns(
61
+ nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
62
+ .then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
63
+ .otherwise(nw.col(f"__{c}_fallback"))
64
+ .alias(c)
65
+ )
66
+ drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
67
+ drop_cols += [f"__{c}_fallback" for c in weighted_cols]
68
+ drop_cols.append("__weight_sum")
69
+ out = out.drop(drop_cols)
70
+
41
71
  return out
42
72
 
43
73
  @nw.narwhalify
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
59
89
  if sample_weight is not None:
60
90
  df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
61
91
 
62
- y_is_numeric = df.select(nw.col("__y")).schema["__y"].is_numeric()
92
+ y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
93
+ non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
94
+ if len(non_uniform) > 0:
95
+ raise ValueError("Target (y) must be uniform within each granularity group")
63
96
 
64
- if y_is_numeric:
65
- agg_exprs = [nw.col("__y").mean().alias("__y")]
66
- else:
67
- agg_exprs = [nw.col("__y").first().alias("__y")]
97
+ agg_exprs = [nw.col("__y").first().alias("__y")]
68
98
 
69
99
  if sample_weight is not None:
70
100
  agg_exprs.append(nw.col("__sw").sum().alias("__sw"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.8
3
+ Version: 0.8.18
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -14,7 +14,7 @@ examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
14
14
  examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
15
15
  examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
16
16
  spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
17
- spforge/autopipeline.py,sha256=ZUwv6Q6O8cD0u5TiSqG6lhW0j16RlSb160AzuOeL2R8,23186
17
+ spforge/autopipeline.py,sha256=rZ6FhJxcgNLvtr3hTVkEiW4BiorgXxADThfMuQ42orE,29866
18
18
  spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
19
19
  spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
20
20
  spforge/features_generator_pipeline.py,sha256=n8vzZKqXNFcFRDWZhllnkhAh5NFXdOD3FEIOpHcay8E,8208
@@ -30,7 +30,7 @@ spforge/estimator/__init__.py,sha256=zIJ4u7WGPOALPx8kVBppBOqklI4lQPl9QBWT8JjjFoY
30
30
  spforge/estimator/_conditional_estimator.py,sha256=JSHpOg5lv3kRv_VzSZ0fKbwCO2dJv9XpyLs9lS81psU,4904
31
31
  spforge/estimator/_frequency_bucketing_classifier.py,sha256=d7wDpOCoKWf-WoXtzwahjtmAozkFdKE3-pzs477WMYc,6055
32
32
  spforge/estimator/_granularity_estimator.py,sha256=pUNmtpDFoOVbS9mHfO-zvidPIKJgWts0y2VnhJ8VWww,3829
33
- spforge/estimator/_group_by_estimator.py,sha256=aXuDvRWvvgK4SEI_DMYscvathmPb6nkMxnqKgG8HC0Y,2769
33
+ spforge/estimator/_group_by_estimator.py,sha256=o-xv_PJJyWBaKv5Eo4EPbOvb9i0CuebZnX4GtEFp_Js,3120
34
34
  spforge/estimator/_ordinal_classifier.py,sha256=j_dfVHeX-6eZgPwwsYbkbP6bPrKH2a5S-N8vfP5hneA,1993
35
35
  spforge/estimator/_sklearn_enhancer_estimator.py,sha256=DZ-UlmeazXPd6uEnlbVv79syZ5FPa64voUyKArtjjUs,4664
36
36
  spforge/feature_generator/__init__.py,sha256=wfLfUkC_lLOCpy7NgDytK-l3HUAuhikuQXdKCgSGbuA,556
@@ -47,32 +47,32 @@ spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8
47
47
  spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
48
48
  spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
49
49
  spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
50
- spforge/performance_transformers/_performance_manager.py,sha256=KwAga6dGhNkXi-MDW6LPjwk6VZwCcjo5L--jnk9aio8,9706
50
+ spforge/performance_transformers/_performance_manager.py,sha256=WmjmlMEnq7y75MiI_s9Y-9eMXIyhPTUKrwsXRtgYp0k,9620
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
52
  spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
54
- spforge/ratings/_player_rating.py,sha256=MyqsyLSY6d7_bxDSnF8eWOyXpSCADWGdepdFSGM4cHw,51365
54
+ spforge/ratings/_player_rating.py,sha256=JSTXdaRw_b8ZoZxgmMnZrYG7gPg8GKawqalLd16SK1M,56066
55
55
  spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
58
58
  spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
59
59
  spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
60
- spforge/ratings/start_rating_generator.py,sha256=_7hIJ9KRVCwsCoY1GIzY8cuOdHR8RH_BCMeMwQG3E04,6776
60
+ spforge/ratings/start_rating_generator.py,sha256=eSasa5Oe9n4IoTGjFCYyFQAGrJtzrBW-Qor97lmaYuM,6776
61
61
  spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
62
- spforge/ratings/team_start_rating_generator.py,sha256=ZJe84sTvE4Yep3d4wKJMMJn2Q4PhcCwkO7Wyd5nsYUA,5110
63
- spforge/ratings/utils.py,sha256=qms5J5SD-FyXDR2G8giDMbu_AoLgI135pjW4nghxROg,3940
62
+ spforge/ratings/team_start_rating_generator.py,sha256=vK-_m8KwcHopchch_lKNHSGLiiNm5q9Lenm0d1cP_po,5110
63
+ spforge/ratings/utils.py,sha256=_zFemqz2jJkH8rn2EZpDt8N6FELUmYp9qCnPzRtOIGU,4497
64
64
  spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
65
- spforge/scorer/_score.py,sha256=TR0T9nJj0aeVgGfOE0fZmXlO66CELulYwxhi7ZAxhvY,56184
65
+ spforge/scorer/_score.py,sha256=kNuqiK3F5mUEAVD7KjWYY7E_AkRrspR362QBm_jyElg,57623
66
66
  spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
67
67
  spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
68
68
  spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
69
69
  spforge/transformers/_operator.py,sha256=jOH7wdMBLg6R2hlH_FU6eA0gjs-Q0vFimTo7fXgKpjI,2964
70
- spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymdjQy5VrsTvJlrA,3152
70
+ spforge/transformers/_other_transformer.py,sha256=w2a7Wnki3vJe4GAkSa4kealw0GILIo6nE_9-3M10owA,4646
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
- tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
74
+ spforge-0.8.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
+ tests/test_autopipeline.py,sha256=7cNAn-nmGolfyfk3THh9IKcHZfRA-pLYC_xAyMg-No4,26863
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
78
78
  tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
@@ -92,12 +92,14 @@ tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7
92
92
  tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
93
93
  tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
94
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
95
- tests/performance_transformers/test_performance_manager.py,sha256=bfC5GiBuzHw-mLmKeEzBUUPuKm0ayax2bsF1j88W8L0,10120
95
+ tests/performance_transformers/test_performance_manager.py,sha256=gjuuV_hb27kCo_kUecPKG3Cbot2Gqis1W3kw2A4ovS4,10690
96
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
97
- tests/ratings/test_player_rating_generator.py,sha256=FGH3Tq0uFoSlkS_XMldsUKhsovBRBvzH9EbqjKvg2O0,59601
97
+ tests/ratings/test_player_rating_generator.py,sha256=SKLaBQBsHYslc2Nia2AxZ8A9Cy16MbZAWjLyOjvcMnA,64094
98
+ tests/ratings/test_player_rating_no_mutation.py,sha256=GzO3Hl__5K68DS3uRLefwnbcTJOvBM7cZqww4M21UZM,8493
98
99
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
99
100
  tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
100
- tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
101
+ tests/ratings/test_utils_scaled_weights.py,sha256=iHxe6ZDUB_I2B6HT0xTGqXBkl7gRlqVV0e_7Lwun5po,4988
102
+ tests/scorer/test_score.py,sha256=rw3xJs6xqWVpalVMUQz557m2JYGR7PmhrsjfTex0b0c,79121
101
103
  tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
102
104
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
103
105
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
@@ -105,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
105
107
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
106
108
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
107
109
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
108
- spforge-0.8.8.dist-info/METADATA,sha256=fO2JHqnnqOrjkWZ1Zh4rgYg58bi4YzxhSa8I72wqDs4,20047
109
- spforge-0.8.8.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
110
- spforge-0.8.8.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
111
- spforge-0.8.8.dist-info/RECORD,,
110
+ spforge-0.8.18.dist-info/METADATA,sha256=54l0UTrew2ot0_4k22hLKL-oXbQ4hlA1_KAXIqf_umw,20048
111
+ spforge-0.8.18.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
+ spforge-0.8.18.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
+ spforge-0.8.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -56,6 +56,21 @@ def test_performance_weights_manager_basic_flow(sample_data):
56
56
  assert output_df["weighted_performance"].iloc[0] == pytest.approx(0.6)
57
57
 
58
58
 
59
+ def test_performance_weights_manager_keeps_mean_when_weights_not_normalized():
60
+ df = pd.DataFrame(
61
+ {
62
+ "feat_a": [0.0, 1.0, 2.0, 3.0],
63
+ "feat_b": [3.0, 2.0, 1.0, 0.0],
64
+ }
65
+ )
66
+ weights = [ColumnWeight(name="feat_a", weight=0.9), ColumnWeight(name="feat_b", weight=0.5)]
67
+
68
+ manager = PerformanceWeightsManager(weights=weights, transformer_names=["min_max"], prefix="")
69
+ output_df = nw.from_native(manager.fit_transform(df)).to_pandas()
70
+
71
+ assert output_df["weighted_performance"].mean() == pytest.approx(0.5, abs=1e-6)
72
+
73
+
59
74
  def test_lower_is_better_logic():
60
75
  df = pd.DataFrame({"feat_a": [1.0, 0.0]})
61
76
  weights = [ColumnWeight(name="feat_a", weight=1.0, lower_is_better=True)]
@@ -551,6 +551,63 @@ def test_fit_transform_scales_participation_weight_by_fit_quantile(base_cn):
551
551
  assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
552
552
 
553
553
 
554
+ def test_fit_transform_auto_scales_participation_weight_when_out_of_bounds(base_cn):
555
+ """Automatically enable scaling when participation weights exceed [0, 1]."""
556
+ df = pl.DataFrame(
557
+ {
558
+ "pid": ["P1", "P2", "O1", "O2"],
559
+ "tid": ["T1", "T1", "T2", "T2"],
560
+ "mid": ["M1", "M1", "M1", "M1"],
561
+ "dt": ["2024-01-01"] * 4,
562
+ "perf": [0.9, 0.9, 0.1, 0.1],
563
+ "pw": [10.0, 20.0, 10.0, 10.0],
564
+ }
565
+ )
566
+ gen = PlayerRatingGenerator(
567
+ performance_column="perf",
568
+ column_names=base_cn,
569
+ auto_scale_performance=True,
570
+ start_harcoded_start_rating=1000.0,
571
+ )
572
+ gen.fit_transform(df)
573
+
574
+ start_rating = 1000.0
575
+ p1_change = gen._player_off_ratings["P1"].rating_value - start_rating
576
+ p2_change = gen._player_off_ratings["P2"].rating_value - start_rating
577
+
578
+ q = df["pw"].quantile(0.99, "linear")
579
+ expected_ratio = min(1.0, 10.0 / q) / min(1.0, 20.0 / q)
580
+
581
+ assert gen.scale_participation_weights is True
582
+ assert p1_change / p2_change == pytest.approx(expected_ratio, rel=1e-6)
583
+
584
+
585
+ def test_fit_transform_auto_scale_logs_warning_when_out_of_bounds(base_cn, caplog):
586
+ """Auto-scaling should emit a warning when participation weights exceed [0, 1]."""
587
+ df = pl.DataFrame(
588
+ {
589
+ "pid": ["P1", "P2", "O1", "O2"],
590
+ "tid": ["T1", "T1", "T2", "T2"],
591
+ "mid": ["M1", "M1", "M1", "M1"],
592
+ "dt": ["2024-01-01"] * 4,
593
+ "perf": [0.9, 0.9, 0.1, 0.1],
594
+ "pw": [10.0, 20.0, 10.0, 10.0],
595
+ }
596
+ )
597
+ gen = PlayerRatingGenerator(
598
+ performance_column="perf",
599
+ column_names=base_cn,
600
+ auto_scale_performance=True,
601
+ start_harcoded_start_rating=1000.0,
602
+ )
603
+ with caplog.at_level("WARNING"):
604
+ gen.fit_transform(df)
605
+
606
+ assert any(
607
+ "Auto-scaling participation weights" in record.message for record in caplog.records
608
+ )
609
+
610
+
554
611
  def test_future_transform_scales_projected_participation_weight_by_fit_quantile():
555
612
  """Future projected participation weights should scale with fit quantile and be clipped."""
556
613
  cn = ColumnNames(
@@ -1689,3 +1746,73 @@ def test_fit_transform__player_rating_difference_from_team_projected_feature(bas
1689
1746
  for row in result.iter_rows(named=True):
1690
1747
  expected = row[player_col] - row[team_col]
1691
1748
  assert row[diff_col] == pytest.approx(expected, rel=1e-9)
1749
+
1750
+
1751
+ def test_fit_transform__start_league_quantile_uses_existing_player_ratings(base_cn):
1752
+ """
1753
+ Bug reproduction: start_league_quantile should use percentile of existing player
1754
+ ratings for new players, but update_players_to_leagues is never called so
1755
+ _league_player_ratings stays empty and all new players get default rating.
1756
+
1757
+ Expected: New player P_NEW should start at 5th percentile of existing ratings (~920)
1758
+ Actual: New player starts at default 1000 because _league_player_ratings is empty
1759
+ """
1760
+ import numpy as np
1761
+
1762
+ num_existing_players = 60
1763
+ player_ids = [f"P{i}" for i in range(num_existing_players)]
1764
+ team_ids = [f"T{i % 2 + 1}" for i in range(num_existing_players)]
1765
+
1766
+ df1 = pl.DataFrame(
1767
+ {
1768
+ "pid": player_ids,
1769
+ "tid": team_ids,
1770
+ "mid": ["M1"] * num_existing_players,
1771
+ "dt": ["2024-01-01"] * num_existing_players,
1772
+ "perf": [0.3 + (i % 10) * 0.07 for i in range(num_existing_players)],
1773
+ "pw": [1.0] * num_existing_players,
1774
+ }
1775
+ )
1776
+
1777
+ gen = PlayerRatingGenerator(
1778
+ performance_column="perf",
1779
+ column_names=base_cn,
1780
+ auto_scale_performance=True,
1781
+ start_league_quantile=0.05,
1782
+ start_min_count_for_percentiles=50,
1783
+ features_out=[RatingKnownFeatures.PLAYER_OFF_RATING],
1784
+ )
1785
+ gen.fit_transform(df1)
1786
+
1787
+ existing_ratings = [
1788
+ gen._player_off_ratings[pid].rating_value for pid in player_ids
1789
+ ]
1790
+ expected_quantile_rating = np.percentile(existing_ratings, 5)
1791
+
1792
+ srg = gen.start_rating_generator
1793
+ assert len(srg._league_player_ratings.get(None, [])) >= 50, (
1794
+ f"Expected _league_player_ratings to have >=50 entries but got "
1795
+ f"{len(srg._league_player_ratings.get(None, []))}. "
1796
+ "update_players_to_leagues is never called."
1797
+ )
1798
+
1799
+ df2 = pl.DataFrame(
1800
+ {
1801
+ "pid": ["P_NEW", "P0"],
1802
+ "tid": ["T1", "T2"],
1803
+ "mid": ["M2", "M2"],
1804
+ "dt": ["2024-01-02", "2024-01-02"],
1805
+ "pw": [1.0, 1.0],
1806
+ }
1807
+ )
1808
+ result = gen.future_transform(df2)
1809
+
1810
+ new_player_start_rating = result.filter(pl.col("pid") == "P_NEW")[
1811
+ "player_off_rating_perf"
1812
+ ][0]
1813
+
1814
+ assert new_player_start_rating == pytest.approx(expected_quantile_rating, rel=0.1), (
1815
+ f"New player should start at 5th percentile ({expected_quantile_rating:.1f}) "
1816
+ f"but got {new_player_start_rating:.1f}. "
1817
+ "start_league_quantile has no effect because update_players_to_leagues is never called."
1818
+ )