spforge 0.8.8__py3-none-any.whl → 0.8.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

spforge/autopipeline.py CHANGED
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
195
195
  return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
196
196
 
197
197
 
198
+ def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
199
+ """Recursively find innermost estimator with feature_importances_ or coef_."""
200
+ if hasattr(estimator, "feature_importances_"):
201
+ inner = _get_importance_estimator_inner(estimator)
202
+ if inner is not None:
203
+ return inner
204
+ return (estimator, "feature_importances_")
205
+
206
+ if hasattr(estimator, "coef_"):
207
+ inner = _get_importance_estimator_inner(estimator)
208
+ if inner is not None:
209
+ return inner
210
+ return (estimator, "coef_")
211
+
212
+ return _get_importance_estimator_inner(estimator)
213
+
214
+
215
+ def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
216
+ """Check wrapped estimators for importance attributes."""
217
+ # Check estimator_ (sklearn fitted wrapper convention)
218
+ if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
219
+ result = _get_importance_estimator(estimator.estimator_)
220
+ if result is not None:
221
+ return result
222
+
223
+ # Check _est (GroupByEstimator convention)
224
+ if hasattr(estimator, "_est") and estimator._est is not None:
225
+ result = _get_importance_estimator(estimator._est)
226
+ if result is not None:
227
+ return result
228
+
229
+ return None
230
+
231
+
198
232
  class AutoPipeline(BaseEstimator):
199
233
  def __init__(
200
234
  self,
@@ -202,6 +236,7 @@ class AutoPipeline(BaseEstimator):
202
236
  estimator_features: list[str],
203
237
  predictor_transformers: list[PredictorTransformer] | None = None,
204
238
  granularity: list[str] | None = None,
239
+ aggregation_weight: str | None = None,
205
240
  filters: list[Filter] | None = None,
206
241
  scale_features: bool = False,
207
242
  categorical_handling: CategoricalHandling = "auto",
@@ -216,6 +251,7 @@ class AutoPipeline(BaseEstimator):
216
251
  self.estimator_features = estimator_features
217
252
  self.feature_names = estimator_features # Internal compat
218
253
  self.granularity = granularity or []
254
+ self.aggregation_weight = aggregation_weight
219
255
  self.predictor_transformers = predictor_transformers
220
256
  self.estimator = estimator
221
257
  self.filters = filters or []
@@ -230,6 +266,7 @@ class AutoPipeline(BaseEstimator):
230
266
  self.numeric_features = numeric_features
231
267
  self.remainder = remainder
232
268
  self._cat_feats = []
269
+ self._filter_feature_names: list[str] = []
233
270
 
234
271
  # Auto-compute context features
235
272
  self.context_feature_names = self._compute_context_features()
@@ -242,11 +279,12 @@ class AutoPipeline(BaseEstimator):
242
279
  self._resolved_categorical_handling: CategoricalHandling | None = None
243
280
 
244
281
  def _compute_context_features(self) -> list[str]:
245
- """Auto-compute context features from estimator, granularity, and filters.
282
+ """Auto-compute context features from estimator and granularity.
246
283
 
247
284
  Note: Context from predictor_transformers is tracked separately in
248
285
  context_predictor_transformer_feature_names and is dropped before
249
- the final estimator.
286
+ the final estimator. Filter columns are tracked separately and are
287
+ dropped before the final estimator.
250
288
  """
251
289
  from spforge.transformers._base import PredictorTransformer
252
290
 
@@ -290,9 +328,15 @@ class AutoPipeline(BaseEstimator):
290
328
  # Add granularity columns
291
329
  context.extend(self.granularity)
292
330
 
331
+ # Add aggregation weight column
332
+ if self.aggregation_weight:
333
+ context.append(self.aggregation_weight)
334
+
293
335
  # Add filter columns
336
+ self._filter_feature_names = []
294
337
  for f in self.filters:
295
- context.append(f.column_name)
338
+ if f.column_name not in self._filter_feature_names:
339
+ self._filter_feature_names.append(f.column_name)
296
340
 
297
341
  # Dedupe while preserving order, excluding estimator_features
298
342
  seen = set()
@@ -454,7 +498,11 @@ class AutoPipeline(BaseEstimator):
454
498
  pre = PreprocessorToDataFrame(pre_raw)
455
499
 
456
500
  est = (
457
- GroupByEstimator(self.estimator, granularity=[f"{c}" for c in self.granularity])
501
+ GroupByEstimator(
502
+ self.estimator,
503
+ granularity=[f"{c}" for c in self.granularity],
504
+ aggregation_weight=self.aggregation_weight,
505
+ )
458
506
  if do_groupby
459
507
  else self.estimator
460
508
  )
@@ -506,8 +554,10 @@ class AutoPipeline(BaseEstimator):
506
554
  prev_transformer_feats_out.extend(feats_out)
507
555
 
508
556
  # Use FunctionTransformer with global function for serializability
557
+ drop_filter_cols = set(self._filter_feature_names)
558
+ drop_cols = drop_ctx_set | drop_filter_cols
509
559
  final = FunctionTransformer(
510
- _drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_ctx_set}
560
+ _drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_cols}
511
561
  )
512
562
  steps.append(("final", final))
513
563
 
@@ -538,6 +588,7 @@ class AutoPipeline(BaseEstimator):
538
588
  self.feature_names
539
589
  + self.context_feature_names
540
590
  + self.context_predictor_transformer_feature_names
591
+ + self._filter_feature_names
541
592
  + self.granularity
542
593
  )
543
594
  )
@@ -626,4 +677,117 @@ class AutoPipeline(BaseEstimator):
626
677
  if ctx not in all_features:
627
678
  all_features.append(ctx)
628
679
 
680
+ # Add filter columns (needed for fit-time filtering)
681
+ for col in self._filter_feature_names:
682
+ if col not in all_features:
683
+ all_features.append(col)
684
+
629
685
  return all_features
686
+
687
+ def _get_estimator_feature_names(self) -> list[str]:
688
+ """Get feature names as seen by the final estimator after all transformations."""
689
+ pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
690
+
691
+ # Remove context columns dropped by "final" step
692
+ final_step = self.sklearn_pipeline.named_steps["final"]
693
+ drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
694
+ features = [f for f in pre_out if f not in drop_cols]
695
+
696
+ # Remove granularity columns (dropped by GroupByEstimator)
697
+ granularity_set = set(self.granularity)
698
+ features = [f for f in features if f not in granularity_set]
699
+
700
+ # Remove context features (used by wrapper estimators, not inner model)
701
+ context_set = set(self.context_feature_names)
702
+ features = [f for f in features if f not in context_set]
703
+
704
+ # Remove filter columns (used only for fit-time filtering)
705
+ filter_set = set(self._filter_feature_names)
706
+ features = [f for f in features if f not in filter_set]
707
+
708
+ return features
709
+
710
+ def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
711
+ names = None
712
+ if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
713
+ names = list(estimator.feature_names_in_)
714
+ elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
715
+ names = list(estimator.feature_name_)
716
+ elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
717
+ names = list(estimator.feature_names_)
718
+ if names is None:
719
+ names = self._get_estimator_feature_names()
720
+ if len(names) != n_features:
721
+ raise ValueError(
722
+ f"Feature names length ({len(names)}) does not match importances length ({n_features})."
723
+ )
724
+ return names
725
+
726
+ @property
727
+ def feature_importances_(self) -> pd.DataFrame:
728
+ """Get feature importances from the fitted estimator.
729
+
730
+ Returns a DataFrame with columns ["feature", "importance"] sorted by
731
+ absolute importance descending. Works with tree-based models
732
+ (feature_importances_) and linear models (coef_).
733
+ """
734
+ if self.sklearn_pipeline is None:
735
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
736
+
737
+ est = self.sklearn_pipeline.named_steps["est"]
738
+ result = _get_importance_estimator(est)
739
+
740
+ if result is None:
741
+ raise RuntimeError(
742
+ "Estimator does not support feature importances. "
743
+ "Requires feature_importances_ or coef_ attribute."
744
+ )
745
+
746
+ inner_est, attr_name = result
747
+ raw = getattr(inner_est, attr_name)
748
+
749
+ if attr_name == "coef_":
750
+ # Linear models: use absolute value of coefficients
751
+ if raw.ndim == 2:
752
+ # Multi-class: average absolute values across classes
753
+ importances = np.abs(raw).mean(axis=0)
754
+ else:
755
+ importances = np.abs(raw)
756
+ else:
757
+ importances = raw
758
+
759
+ feature_names = self._get_estimator_feature_names()
760
+
761
+ df = pd.DataFrame({"feature": feature_names, "importance": importances})
762
+ df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
763
+ return df
764
+
765
+ @property
766
+ def feature_importance_names(self) -> dict[str, float]:
767
+ """Map deepest estimator feature names to importances."""
768
+ if self.sklearn_pipeline is None:
769
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
770
+
771
+ est = self.sklearn_pipeline.named_steps["est"]
772
+ result = _get_importance_estimator(est)
773
+
774
+ if result is None:
775
+ raise RuntimeError(
776
+ "Estimator does not support feature importances. "
777
+ "Requires feature_importances_ or coef_ attribute."
778
+ )
779
+
780
+ inner_est, attr_name = result
781
+ raw = getattr(inner_est, attr_name)
782
+
783
+ if attr_name == "coef_":
784
+ if raw.ndim == 2:
785
+ importances = np.abs(raw).mean(axis=0)
786
+ else:
787
+ importances = np.abs(raw)
788
+ else:
789
+ importances = raw
790
+
791
+ importances = np.asarray(importances)
792
+ feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
793
+ return dict(zip(feature_names, importances.tolist()))
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
10
10
 
11
11
 
12
12
  class GroupByEstimator(BaseEstimator):
13
- def __init__(self, estimator: Any, granularity: list[str] | None = None):
13
+ def __init__(
14
+ self,
15
+ estimator: Any,
16
+ granularity: list[str] | None = None,
17
+ aggregation_weight: str | None = None,
18
+ ):
14
19
  self.estimator = estimator
15
20
  self.granularity = granularity or []
16
- self._reducer = GroupByReducer(self.granularity)
21
+ self.aggregation_weight = aggregation_weight
22
+ self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
17
23
  self._est = None
18
24
 
19
25
  def __sklearn_is_fitted__(self):
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
22
28
  @nw.narwhalify
23
29
  def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
24
30
  X = X.to_pandas()
25
- self._reducer = GroupByReducer(self.granularity)
31
+ # Backwards compatibility: old pickled objects may not have aggregation_weight
32
+ agg_weight = getattr(self, "aggregation_weight", None)
33
+ self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
26
34
  X_red = nw.from_native(self._reducer.fit_transform(X))
27
35
  y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
28
36
 
@@ -250,8 +250,6 @@ class PerformanceWeightsManager(PerformanceManager):
250
250
  )
251
251
  )
252
252
 
253
- sum_weight = sum([w.weight for w in self.weights])
254
-
255
253
  for column_weight in self.weights:
256
254
  weight_col = f"weight__{column_weight.name}"
257
255
  feature_col = column_weight.name
@@ -261,14 +259,14 @@ class PerformanceWeightsManager(PerformanceManager):
261
259
  df = df.with_columns(
262
260
  (
263
261
  nw.col(tmp_out_performance_colum_name)
264
- + (nw.col(weight_col) / sum_weight * (1 - nw.col(feature_name)))
262
+ + (nw.col(weight_col) * (1 - nw.col(feature_name)))
265
263
  ).alias(tmp_out_performance_colum_name)
266
264
  )
267
265
  else:
268
266
  df = df.with_columns(
269
267
  (
270
268
  nw.col(tmp_out_performance_colum_name)
271
- + (nw.col(weight_col) / sum_weight * nw.col(feature_name))
269
+ + (nw.col(weight_col) * nw.col(feature_name))
272
270
  ).alias(tmp_out_performance_colum_name)
273
271
  )
274
272
 
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  import copy
5
5
  import math
6
+ import logging
6
7
  from typing import Any, Literal
7
8
 
8
9
  import narwhals.stable.v2 as nw
@@ -15,6 +16,7 @@ from spforge.data_structures import (
15
16
  MatchPerformance,
16
17
  MatchPlayer,
17
18
  PlayerRating,
19
+ PlayerRatingChange,
18
20
  PlayerRatingsResult,
19
21
  PreMatchPlayerRating,
20
22
  PreMatchPlayersCollection,
@@ -33,6 +35,8 @@ from spforge.ratings.utils import (
33
35
  from spforge.feature_generator._utils import to_polars
34
36
 
35
37
  PLAYER_STATS = "__PLAYER_STATS"
38
+ _SCALED_PW = "__scaled_participation_weight__"
39
+ _SCALED_PPW = "__scaled_projected_participation_weight__"
36
40
 
37
41
 
38
42
  class PlayerRatingGenerator(RatingGenerator):
@@ -75,12 +79,13 @@ class PlayerRatingGenerator(RatingGenerator):
75
79
  start_min_count_for_percentiles: int = 50,
76
80
  start_team_rating_subtract: float = 80,
77
81
  start_team_weight: float = 0,
78
- start_max_days_ago_league_entities: int = 120,
82
+ start_max_days_ago_league_entities: int = 600,
79
83
  start_min_match_count_team_rating: int = 2,
80
84
  start_harcoded_start_rating: float | None = None,
81
85
  column_names: ColumnNames | None = None,
82
86
  output_suffix: str | None = None,
83
87
  scale_participation_weights: bool = False,
88
+ auto_scale_participation_weights: bool = True,
84
89
  **kwargs: Any,
85
90
  ):
86
91
  super().__init__(
@@ -164,6 +169,7 @@ class PlayerRatingGenerator(RatingGenerator):
164
169
 
165
170
  self.use_off_def_split = bool(use_off_def_split)
166
171
  self.scale_participation_weights = bool(scale_participation_weights)
172
+ self.auto_scale_participation_weights = bool(auto_scale_participation_weights)
167
173
  self._participation_weight_max: float | None = None
168
174
  self._projected_participation_weight_max: float | None = None
169
175
 
@@ -189,9 +195,39 @@ class PlayerRatingGenerator(RatingGenerator):
189
195
  column_names: ColumnNames | None = None,
190
196
  ) -> DataFrame | IntoFrameT:
191
197
  self.column_names = column_names if column_names else self.column_names
198
+ self._maybe_enable_participation_weight_scaling(df)
192
199
  self._set_participation_weight_max(df)
193
200
  return super().fit_transform(df, column_names)
194
201
 
202
+ def _maybe_enable_participation_weight_scaling(self, df: DataFrame) -> None:
203
+ if self.scale_participation_weights or not self.auto_scale_participation_weights:
204
+ return
205
+ cn = self.column_names
206
+ if not cn:
207
+ return
208
+
209
+ pl_df = df.to_native() if df.implementation.is_polars() else df.to_polars().to_native()
210
+
211
+ def _out_of_bounds(col_name: str | None) -> bool:
212
+ if not col_name or col_name not in df.columns:
213
+ return False
214
+ col = pl_df[col_name]
215
+ min_val = col.min()
216
+ max_val = col.max()
217
+ if min_val is None or max_val is None:
218
+ return False
219
+ eps = 1e-6
220
+ return min_val < -eps or max_val > (1.0 + eps)
221
+
222
+ if _out_of_bounds(cn.participation_weight) or _out_of_bounds(
223
+ cn.projected_participation_weight
224
+ ):
225
+ self.scale_participation_weights = True
226
+ logging.warning(
227
+ "Auto-scaling participation weights because values exceed [0, 1]. "
228
+ "Set scale_participation_weights=True explicitly to silence this warning."
229
+ )
230
+
195
231
  def _ensure_player_off(self, player_id: str) -> PlayerRating:
196
232
  if player_id not in self._player_off_ratings:
197
233
  # create with start generator later; initialize to 0 now; overwritten when needed
@@ -240,6 +276,7 @@ class PlayerRatingGenerator(RatingGenerator):
240
276
  self._projected_participation_weight_max = self._participation_weight_max
241
277
 
242
278
  def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
279
+ """Create internal scaled participation weight columns without mutating originals."""
243
280
  if not self.scale_participation_weights:
244
281
  return df
245
282
  if self._participation_weight_max is None or self._participation_weight_max <= 0:
@@ -254,7 +291,7 @@ class PlayerRatingGenerator(RatingGenerator):
254
291
  df = df.with_columns(
255
292
  (pl.col(cn.participation_weight) / denom)
256
293
  .clip(0.0, 1.0)
257
- .alias(cn.participation_weight)
294
+ .alias(_SCALED_PW)
258
295
  )
259
296
 
260
297
  if (
@@ -267,16 +304,38 @@ class PlayerRatingGenerator(RatingGenerator):
267
304
  df = df.with_columns(
268
305
  (pl.col(cn.projected_participation_weight) / denom)
269
306
  .clip(0.0, 1.0)
270
- .alias(cn.projected_participation_weight)
307
+ .alias(_SCALED_PPW)
271
308
  )
272
309
 
273
310
  return df
274
311
 
312
+ def _get_participation_weight_col(self) -> str:
313
+ """Get the column name to use for participation weight (scaled if available)."""
314
+ cn = self.column_names
315
+ if self.scale_participation_weights and cn and cn.participation_weight:
316
+ return _SCALED_PW
317
+ return cn.participation_weight if cn else ""
318
+
319
+ def _get_projected_participation_weight_col(self) -> str:
320
+ """Get the column name to use for projected participation weight (scaled if available)."""
321
+ cn = self.column_names
322
+ if self.scale_participation_weights and cn and cn.projected_participation_weight:
323
+ return _SCALED_PPW
324
+ return cn.projected_participation_weight if cn else ""
325
+
326
+ def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
327
+ """Remove internal scaled columns before returning."""
328
+ cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
329
+ if cols_to_drop:
330
+ df = df.drop(cols_to_drop)
331
+ return df
332
+
275
333
  def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
276
334
  df = self._scale_participation_weight_columns(df)
277
335
  match_df = self._create_match_df(df)
278
336
  ratings = self._calculate_ratings(match_df)
279
337
 
338
+ # Keep scaled columns for now - they're needed by _add_rating_features
280
339
  cols = [
281
340
  c
282
341
  for c in df.columns
@@ -296,13 +355,15 @@ class PlayerRatingGenerator(RatingGenerator):
296
355
  on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
297
356
  )
298
357
 
299
- return self._add_rating_features(df)
358
+ result = self._add_rating_features(df)
359
+ return self._remove_internal_scaled_columns(result)
300
360
 
301
361
  def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
302
362
  df = self._scale_participation_weight_columns(df)
303
363
  match_df = self._create_match_df(df)
304
364
  ratings = self._calculate_future_ratings(match_df)
305
365
 
366
+ # Keep scaled columns for now - they're needed by _add_rating_features
306
367
  cols = [
307
368
  c
308
369
  for c in df.columns
@@ -327,7 +388,8 @@ class PlayerRatingGenerator(RatingGenerator):
327
388
  how="left",
328
389
  )
329
390
 
330
- return self._add_rating_features(df_with_ratings)
391
+ result = self._add_rating_features(df_with_ratings)
392
+ return self._remove_internal_scaled_columns(result)
331
393
 
332
394
  def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
333
395
  cn = self.column_names
@@ -381,9 +443,9 @@ class PlayerRatingGenerator(RatingGenerator):
381
443
  team1_off_rating, team1_def_rating = self._team_off_def_rating_from_collection(c1)
382
444
  team2_off_rating, team2_def_rating = self._team_off_def_rating_from_collection(c2)
383
445
 
384
- player_updates: list[tuple[str, str, float, float, float, float, float, float, int]] = (
385
- []
386
- )
446
+ player_updates: list[
447
+ tuple[str, str, float, float, float, float, float, float, int, str | None]
448
+ ] = []
387
449
 
388
450
  for pre_player in c1.pre_match_player_ratings:
389
451
  pid = pre_player.id
@@ -459,6 +521,7 @@ class PlayerRatingGenerator(RatingGenerator):
459
521
  float(off_change),
460
522
  float(def_change),
461
523
  day_number,
524
+ pre_player.league,
462
525
  )
463
526
  )
464
527
 
@@ -536,6 +599,7 @@ class PlayerRatingGenerator(RatingGenerator):
536
599
  float(off_change),
537
600
  float(def_change),
538
601
  day_number,
602
+ pre_player.league,
539
603
  )
540
604
  )
541
605
 
@@ -550,6 +614,7 @@ class PlayerRatingGenerator(RatingGenerator):
550
614
  _off_change,
551
615
  _def_change,
552
616
  _dn,
617
+ _league,
553
618
  ) in player_updates:
554
619
  out[cn.player_id].append(pid)
555
620
  out[cn.match_id].append(match_id)
@@ -566,15 +631,18 @@ class PlayerRatingGenerator(RatingGenerator):
566
631
  for (
567
632
  pid,
568
633
  team_id,
569
- _off_pre,
634
+ off_pre,
570
635
  _def_pre,
571
636
  _pred_off,
572
637
  _pred_def,
573
638
  off_change,
574
639
  def_change,
575
640
  dn,
641
+ league,
576
642
  ) in player_updates:
577
- pending_team_updates.append((pid, team_id, off_change, def_change, dn))
643
+ pending_team_updates.append(
644
+ (pid, team_id, off_pre, off_change, def_change, dn, league)
645
+ )
578
646
 
579
647
  if last_update_id is None:
580
648
  last_update_id = update_id
@@ -584,9 +652,11 @@ class PlayerRatingGenerator(RatingGenerator):
584
652
 
585
653
  return pl.DataFrame(out, strict=False)
586
654
 
587
- def _apply_player_updates(self, updates: list[tuple[str, str, float, float, int]]) -> None:
655
+ def _apply_player_updates(
656
+ self, updates: list[tuple[str, str, float, float, float, int, str | None]]
657
+ ) -> None:
588
658
 
589
- for player_id, team_id, off_change, def_change, day_number in updates:
659
+ for player_id, team_id, pre_rating, off_change, def_change, day_number, league in updates:
590
660
  off_state = self._player_off_ratings[player_id]
591
661
  off_state.confidence_sum = self._calculate_post_match_confidence_sum(
592
662
  entity_rating=off_state,
@@ -609,6 +679,19 @@ class PlayerRatingGenerator(RatingGenerator):
609
679
  def_state.last_match_day_number = int(day_number)
610
680
  def_state.most_recent_team_id = team_id
611
681
 
682
+ self.start_rating_generator.update_players_to_leagues(
683
+ PlayerRatingChange(
684
+ id=player_id,
685
+ day_number=day_number,
686
+ league=league,
687
+ participation_weight=1.0,
688
+ predicted_performance=0.0,
689
+ performance=0.0,
690
+ pre_match_rating_value=pre_rating,
691
+ rating_change_value=off_change,
692
+ )
693
+ )
694
+
612
695
  def _add_rating_features(self, df: pl.DataFrame) -> pl.DataFrame:
613
696
  cols_to_add = set((self._features_out or []) + (self.non_predictor_features_out or []))
614
697
 
@@ -763,9 +846,13 @@ class PlayerRatingGenerator(RatingGenerator):
763
846
 
764
847
  if cn.participation_weight and cn.participation_weight in df.columns:
765
848
  player_stat_cols.append(cn.participation_weight)
849
+ if _SCALED_PW in df.columns:
850
+ player_stat_cols.append(_SCALED_PW)
766
851
 
767
852
  if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
768
853
  player_stat_cols.append(cn.projected_participation_weight)
854
+ if _SCALED_PPW in df.columns:
855
+ player_stat_cols.append(_SCALED_PPW)
769
856
 
770
857
  if cn.position and cn.position in df.columns:
771
858
  player_stat_cols.append(cn.position)
@@ -821,14 +908,23 @@ class PlayerRatingGenerator(RatingGenerator):
821
908
  position = team_player.get(cn.position)
822
909
  player_league = team_player.get(cn.league, None)
823
910
 
824
- participation_weight = (
825
- team_player.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
826
- )
827
- projected_participation_weight = (
828
- team_player.get(cn.projected_participation_weight, participation_weight)
829
- if cn.projected_participation_weight
830
- else participation_weight
831
- )
911
+ # Use scaled participation weight if available, otherwise use original
912
+ if _SCALED_PW in team_player:
913
+ participation_weight = team_player.get(_SCALED_PW, 1.0)
914
+ elif cn.participation_weight:
915
+ participation_weight = team_player.get(cn.participation_weight, 1.0)
916
+ else:
917
+ participation_weight = 1.0
918
+
919
+ # Use scaled projected participation weight if available, otherwise use original
920
+ if _SCALED_PPW in team_player:
921
+ projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
922
+ elif cn.projected_participation_weight:
923
+ projected_participation_weight = team_player.get(
924
+ cn.projected_participation_weight, participation_weight
925
+ )
926
+ else:
927
+ projected_participation_weight = participation_weight
832
928
  projected_participation_weights.append(projected_participation_weight)
833
929
 
834
930
  perf_val = (
@@ -1054,14 +1150,21 @@ class PlayerRatingGenerator(RatingGenerator):
1054
1150
  position = tp.get(cn.position)
1055
1151
  league = tp.get(cn.league, None)
1056
1152
 
1057
- pw = (
1058
- tp.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
1059
- )
1060
- ppw = (
1061
- tp.get(cn.projected_participation_weight, pw)
1062
- if cn.projected_participation_weight
1063
- else pw
1064
- )
1153
+ # Use scaled participation weight if available, otherwise use original
1154
+ if _SCALED_PW in tp:
1155
+ pw = tp.get(_SCALED_PW, 1.0)
1156
+ elif cn.participation_weight:
1157
+ pw = tp.get(cn.participation_weight, 1.0)
1158
+ else:
1159
+ pw = 1.0
1160
+
1161
+ # Use scaled projected participation weight if available, otherwise use original
1162
+ if _SCALED_PPW in tp:
1163
+ ppw = tp.get(_SCALED_PPW, pw)
1164
+ elif cn.projected_participation_weight:
1165
+ ppw = tp.get(cn.projected_participation_weight, pw)
1166
+ else:
1167
+ ppw = pw
1065
1168
  proj_w.append(float(ppw))
1066
1169
 
1067
1170
  mp = MatchPerformance(
@@ -28,7 +28,7 @@ class StartRatingGenerator:
28
28
  min_count_for_percentiles: int = 50,
29
29
  team_rating_subtract: float = 80,
30
30
  team_weight: float = 0,
31
- max_days_ago_league_entities: int = 120,
31
+ max_days_ago_league_entities: int = 600,
32
32
  min_match_count_team_rating: int = 2,
33
33
  harcoded_start_rating: float | None = None,
34
34
  ):
@@ -24,7 +24,7 @@ class TeamStartRatingGenerator:
24
24
  league_ratings: dict[str, float] | None = None,
25
25
  league_quantile: float = 0.2,
26
26
  min_count_for_percentiles: int = 50,
27
- max_days_ago_league_entities: int = 120,
27
+ max_days_ago_league_entities: int = 600,
28
28
  min_match_count_team_rating: int = 2,
29
29
  harcoded_start_rating: float | None = None,
30
30
  ):