spforge 0.8.4__py3-none-any.whl → 0.8.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (37) hide show
  1. examples/lol/pipeline_transformer_example.py +69 -86
  2. examples/nba/cross_validation_example.py +4 -11
  3. examples/nba/feature_engineering_example.py +33 -15
  4. examples/nba/game_winner_example.py +24 -14
  5. examples/nba/predictor_transformers_example.py +29 -16
  6. spforge/__init__.py +1 -0
  7. spforge/autopipeline.py +169 -5
  8. spforge/estimator/_group_by_estimator.py +11 -3
  9. spforge/features_generator_pipeline.py +8 -4
  10. spforge/hyperparameter_tuning/__init__.py +12 -0
  11. spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
  12. spforge/hyperparameter_tuning/_tuner.py +192 -0
  13. spforge/performance_transformers/_performance_manager.py +2 -4
  14. spforge/ratings/__init__.py +4 -0
  15. spforge/ratings/_player_rating.py +142 -28
  16. spforge/ratings/league_start_rating_optimizer.py +201 -0
  17. spforge/ratings/start_rating_generator.py +1 -1
  18. spforge/ratings/team_start_rating_generator.py +1 -1
  19. spforge/ratings/utils.py +16 -6
  20. spforge/scorer/_score.py +42 -11
  21. spforge/transformers/_other_transformer.py +38 -8
  22. {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/METADATA +12 -19
  23. {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/RECORD +37 -31
  24. {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/WHEEL +1 -1
  25. tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
  26. tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
  27. tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
  28. tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
  29. tests/performance_transformers/test_performance_manager.py +15 -0
  30. tests/ratings/test_player_rating_generator.py +154 -0
  31. tests/ratings/test_player_rating_no_mutation.py +214 -0
  32. tests/ratings/test_utils_scaled_weights.py +136 -0
  33. tests/scorer/test_score.py +232 -0
  34. tests/test_autopipeline.py +336 -6
  35. tests/test_feature_generator_pipeline.py +43 -0
  36. {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/licenses/LICENSE +0 -0
  37. {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/top_level.txt +0 -0
@@ -45,6 +45,8 @@ class ParamSpec:
45
45
  elif self.param_type == "int":
46
46
  if self.low is None or self.high is None:
47
47
  raise ValueError(f"int parameter '{name}' requires low and high bounds")
48
+ if self.step is None:
49
+ return trial.suggest_int(name, int(self.low), int(self.high))
48
50
  return trial.suggest_int(name, int(self.low), int(self.high), step=self.step)
49
51
  elif self.param_type == "categorical":
50
52
  if self.choices is None:
@@ -272,3 +274,193 @@ class RatingHyperparameterTuner:
272
274
  raise ValueError("Scorer returned invalid values in dict")
273
275
  return float(np.mean(values))
274
276
  return float(score)
277
+
278
+
279
+ def _is_estimator(obj: object) -> bool:
280
+ return hasattr(obj, "get_params") and hasattr(obj, "set_params")
281
+
282
+
283
+ def _get_leaf_estimator_paths(estimator: Any) -> dict[str, Any]:
284
+ if not _is_estimator(estimator):
285
+ raise ValueError("estimator must implement get_params and set_params")
286
+
287
+ params = estimator.get_params(deep=True)
288
+ estimator_keys = [k for k, v in params.items() if _is_estimator(v)]
289
+
290
+ if not estimator_keys:
291
+ return {"": estimator}
292
+
293
+ leaves: list[str] = []
294
+ for key in estimator_keys:
295
+ if not any(other != key and other.startswith(f"{key}__") for other in estimator_keys):
296
+ leaves.append(key)
297
+
298
+ return {key: params[key] for key in sorted(leaves)}
299
+
300
+
301
+ def _build_search_space_for_targets(
302
+ targets: dict[str, dict[str, ParamSpec]],
303
+ ) -> dict[str, ParamSpec]:
304
+ search_space: dict[str, ParamSpec] = {}
305
+ for path, params in targets.items():
306
+ for param_name, param_spec in params.items():
307
+ full_name = f"{path}__{param_name}" if path else param_name
308
+ if full_name in search_space:
309
+ raise ValueError(f"Duplicate parameter name detected: {full_name}")
310
+ search_space[full_name] = param_spec
311
+ return search_space
312
+
313
+
314
+ def _enqueue_predicted_r_weight_zero(study: optuna.Study, search_space: dict[str, ParamSpec]):
315
+ zero_params: dict[str, float] = {}
316
+ for name, spec in search_space.items():
317
+ if not name.endswith("predicted_r_weight"):
318
+ continue
319
+ if spec.param_type not in {"float", "int"}:
320
+ continue
321
+ if spec.low is None or spec.high is None:
322
+ continue
323
+ if spec.low <= 0 <= spec.high:
324
+ zero_params[name] = 0.0
325
+
326
+ if zero_params:
327
+ study.enqueue_trial(zero_params)
328
+
329
+
330
+ class EstimatorHyperparameterTuner:
331
+ """
332
+ Hyperparameter tuner for sklearn-compatible estimators.
333
+
334
+ Supports nested estimators and can target deepest leaf estimators.
335
+ """
336
+
337
+ def __init__(
338
+ self,
339
+ estimator: Any,
340
+ cross_validator: MatchKFoldCrossValidator,
341
+ scorer: BaseScorer,
342
+ direction: Literal["minimize", "maximize"],
343
+ param_search_space: dict[str, ParamSpec] | None = None,
344
+ param_targets: dict[str, dict[str, ParamSpec]] | None = None,
345
+ n_trials: int = 50,
346
+ n_jobs: int = 1,
347
+ storage: str | None = None,
348
+ study_name: str | None = None,
349
+ timeout: float | None = None,
350
+ show_progress_bar: bool = True,
351
+ sampler: optuna.samplers.BaseSampler | None = None,
352
+ pruner: optuna.pruners.BasePruner | None = None,
353
+ ):
354
+ self.estimator = estimator
355
+ self.cross_validator = cross_validator
356
+ self.scorer = scorer
357
+ self.direction = direction
358
+ self.param_search_space = param_search_space
359
+ self.param_targets = param_targets
360
+ self.n_trials = n_trials
361
+ self.n_jobs = n_jobs
362
+ self.storage = storage
363
+ self.study_name = study_name
364
+ self.timeout = timeout
365
+ self.show_progress_bar = show_progress_bar
366
+ self.sampler = sampler
367
+ self.pruner = pruner
368
+
369
+ if direction not in ["minimize", "maximize"]:
370
+ raise ValueError(f"direction must be 'minimize' or 'maximize', got: {direction}")
371
+
372
+ if storage is not None and study_name is None:
373
+ raise ValueError("study_name is required when using storage")
374
+
375
+ if param_search_space is not None and param_targets is not None:
376
+ raise ValueError("param_search_space and param_targets cannot both be provided")
377
+
378
+ def optimize(self, df: IntoFrameT) -> OptunaResult:
379
+ from spforge.hyperparameter_tuning._default_search_spaces import (
380
+ get_default_estimator_search_space,
381
+ )
382
+
383
+ leaf_estimators = _get_leaf_estimator_paths(self.estimator)
384
+ default_targets = {
385
+ path: get_default_estimator_search_space(est)
386
+ for path, est in leaf_estimators.items()
387
+ }
388
+ default_targets = {path: space for path, space in default_targets.items() if space}
389
+
390
+ if self.param_targets is not None:
391
+ unknown = set(self.param_targets) - set(leaf_estimators)
392
+ if unknown:
393
+ raise ValueError(f"param_targets contains unknown estimator paths: {unknown}")
394
+ targets = self.param_targets
395
+ elif self.param_search_space is not None:
396
+ targets = {path: self.param_search_space for path in leaf_estimators}
397
+ elif default_targets:
398
+ targets = default_targets
399
+ else:
400
+ raise ValueError(
401
+ "param_search_space is required when no default search space is available"
402
+ )
403
+
404
+ search_space = _build_search_space_for_targets(targets)
405
+ if not search_space:
406
+ raise ValueError("Resolved search space is empty")
407
+
408
+ study = optuna.create_study(
409
+ direction=self.direction,
410
+ sampler=self.sampler,
411
+ pruner=self.pruner,
412
+ storage=self.storage,
413
+ study_name=self.study_name,
414
+ load_if_exists=True if self.storage else False,
415
+ )
416
+
417
+ _enqueue_predicted_r_weight_zero(study, search_space)
418
+
419
+ study.optimize(
420
+ lambda trial: self._objective(trial, df, search_space),
421
+ n_trials=self.n_trials,
422
+ n_jobs=self.n_jobs,
423
+ timeout=self.timeout,
424
+ show_progress_bar=self.show_progress_bar,
425
+ )
426
+
427
+ return OptunaResult(
428
+ best_params=study.best_params,
429
+ best_value=study.best_value,
430
+ best_trial=study.best_trial,
431
+ study=study,
432
+ )
433
+
434
+ def _objective(
435
+ self, trial: optuna.Trial, df: IntoFrameT, search_space: dict[str, ParamSpec]
436
+ ) -> float:
437
+ try:
438
+ trial_params = self._suggest_params(trial, search_space)
439
+
440
+ copied_estimator = copy.deepcopy(self.estimator)
441
+ copied_estimator.set_params(**trial_params)
442
+
443
+ cv = copy.deepcopy(self.cross_validator)
444
+ cv.estimator = copied_estimator
445
+
446
+ validation_df = cv.generate_validation_df(df)
447
+ score = self.scorer.score(validation_df)
448
+ score_value = RatingHyperparameterTuner._aggregate_score(score)
449
+
450
+ if math.isnan(score_value) or math.isinf(score_value):
451
+ logger.warning(f"Trial {trial.number} returned invalid score: {score_value}")
452
+ return float("inf") if self.direction == "minimize" else float("-inf")
453
+
454
+ return score_value
455
+
456
+ except Exception as e:
457
+ logger.warning(f"Trial {trial.number} failed with error: {e}")
458
+ return float("inf") if self.direction == "minimize" else float("-inf")
459
+
460
+ def _suggest_params(
461
+ self, trial: optuna.Trial, search_space: dict[str, ParamSpec]
462
+ ) -> dict[str, Any]:
463
+ params: dict[str, Any] = {}
464
+ for param_name, param_spec in search_space.items():
465
+ params[param_name] = param_spec.suggest(trial, param_name)
466
+ return params
@@ -250,8 +250,6 @@ class PerformanceWeightsManager(PerformanceManager):
250
250
  )
251
251
  )
252
252
 
253
- sum_weight = sum([w.weight for w in self.weights])
254
-
255
253
  for column_weight in self.weights:
256
254
  weight_col = f"weight__{column_weight.name}"
257
255
  feature_col = column_weight.name
@@ -261,14 +259,14 @@ class PerformanceWeightsManager(PerformanceManager):
261
259
  df = df.with_columns(
262
260
  (
263
261
  nw.col(tmp_out_performance_colum_name)
264
- + (nw.col(weight_col) / sum_weight * (1 - nw.col(feature_name)))
262
+ + (nw.col(weight_col) * (1 - nw.col(feature_name)))
265
263
  ).alias(tmp_out_performance_colum_name)
266
264
  )
267
265
  else:
268
266
  df = df.with_columns(
269
267
  (
270
268
  nw.col(tmp_out_performance_colum_name)
271
- + (nw.col(weight_col) / sum_weight * nw.col(feature_name))
269
+ + (nw.col(weight_col) * nw.col(feature_name))
272
270
  ).alias(tmp_out_performance_colum_name)
273
271
  )
274
272
 
@@ -6,3 +6,7 @@ from .enums import (
6
6
  RatingUnknownFeatures as RatingUnknownFeatures,
7
7
  )
8
8
  from .league_identifier import LeagueIdentifier as LeagueIdentifier
9
+ from .league_start_rating_optimizer import (
10
+ LeagueStartRatingOptimizationResult as LeagueStartRatingOptimizationResult,
11
+ LeagueStartRatingOptimizer as LeagueStartRatingOptimizer,
12
+ )
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  import copy
5
5
  import math
6
+ import logging
6
7
  from typing import Any, Literal
7
8
 
8
9
  import narwhals.stable.v2 as nw
@@ -15,6 +16,7 @@ from spforge.data_structures import (
15
16
  MatchPerformance,
16
17
  MatchPlayer,
17
18
  PlayerRating,
19
+ PlayerRatingChange,
18
20
  PlayerRatingsResult,
19
21
  PreMatchPlayerRating,
20
22
  PreMatchPlayersCollection,
@@ -33,6 +35,8 @@ from spforge.ratings.utils import (
33
35
  from spforge.feature_generator._utils import to_polars
34
36
 
35
37
  PLAYER_STATS = "__PLAYER_STATS"
38
+ _SCALED_PW = "__scaled_participation_weight__"
39
+ _SCALED_PPW = "__scaled_projected_participation_weight__"
36
40
 
37
41
 
38
42
  class PlayerRatingGenerator(RatingGenerator):
@@ -75,12 +79,13 @@ class PlayerRatingGenerator(RatingGenerator):
75
79
  start_min_count_for_percentiles: int = 50,
76
80
  start_team_rating_subtract: float = 80,
77
81
  start_team_weight: float = 0,
78
- start_max_days_ago_league_entities: int = 120,
82
+ start_max_days_ago_league_entities: int = 600,
79
83
  start_min_match_count_team_rating: int = 2,
80
84
  start_harcoded_start_rating: float | None = None,
81
85
  column_names: ColumnNames | None = None,
82
86
  output_suffix: str | None = None,
83
87
  scale_participation_weights: bool = False,
88
+ auto_scale_participation_weights: bool = True,
84
89
  **kwargs: Any,
85
90
  ):
86
91
  super().__init__(
@@ -129,6 +134,9 @@ class PlayerRatingGenerator(RatingGenerator):
129
134
  str(RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_PROJECTED)
130
135
  )
131
136
  self.MEAN_PROJ_COL = self._suffix(str(RatingKnownFeatures.RATING_MEAN_PROJECTED))
137
+ self.PLAYER_DIFF_FROM_TEAM_PROJ_COL = self._suffix(
138
+ str(RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED)
139
+ )
132
140
 
133
141
  self.TEAM_OFF_RATING_PROJ_COL = self._suffix(
134
142
  str(RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED)
@@ -161,6 +169,7 @@ class PlayerRatingGenerator(RatingGenerator):
161
169
 
162
170
  self.use_off_def_split = bool(use_off_def_split)
163
171
  self.scale_participation_weights = bool(scale_participation_weights)
172
+ self.auto_scale_participation_weights = bool(auto_scale_participation_weights)
164
173
  self._participation_weight_max: float | None = None
165
174
  self._projected_participation_weight_max: float | None = None
166
175
 
@@ -186,9 +195,39 @@ class PlayerRatingGenerator(RatingGenerator):
186
195
  column_names: ColumnNames | None = None,
187
196
  ) -> DataFrame | IntoFrameT:
188
197
  self.column_names = column_names if column_names else self.column_names
198
+ self._maybe_enable_participation_weight_scaling(df)
189
199
  self._set_participation_weight_max(df)
190
200
  return super().fit_transform(df, column_names)
191
201
 
202
+ def _maybe_enable_participation_weight_scaling(self, df: DataFrame) -> None:
203
+ if self.scale_participation_weights or not self.auto_scale_participation_weights:
204
+ return
205
+ cn = self.column_names
206
+ if not cn:
207
+ return
208
+
209
+ pl_df = df.to_native() if df.implementation.is_polars() else df.to_polars().to_native()
210
+
211
+ def _out_of_bounds(col_name: str | None) -> bool:
212
+ if not col_name or col_name not in df.columns:
213
+ return False
214
+ col = pl_df[col_name]
215
+ min_val = col.min()
216
+ max_val = col.max()
217
+ if min_val is None or max_val is None:
218
+ return False
219
+ eps = 1e-6
220
+ return min_val < -eps or max_val > (1.0 + eps)
221
+
222
+ if _out_of_bounds(cn.participation_weight) or _out_of_bounds(
223
+ cn.projected_participation_weight
224
+ ):
225
+ self.scale_participation_weights = True
226
+ logging.warning(
227
+ "Auto-scaling participation weights because values exceed [0, 1]. "
228
+ "Set scale_participation_weights=True explicitly to silence this warning."
229
+ )
230
+
192
231
  def _ensure_player_off(self, player_id: str) -> PlayerRating:
193
232
  if player_id not in self._player_off_ratings:
194
233
  # create with start generator later; initialize to 0 now; overwritten when needed
@@ -237,6 +276,7 @@ class PlayerRatingGenerator(RatingGenerator):
237
276
  self._projected_participation_weight_max = self._participation_weight_max
238
277
 
239
278
  def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
279
+ """Create internal scaled participation weight columns without mutating originals."""
240
280
  if not self.scale_participation_weights:
241
281
  return df
242
282
  if self._participation_weight_max is None or self._participation_weight_max <= 0:
@@ -251,7 +291,7 @@ class PlayerRatingGenerator(RatingGenerator):
251
291
  df = df.with_columns(
252
292
  (pl.col(cn.participation_weight) / denom)
253
293
  .clip(0.0, 1.0)
254
- .alias(cn.participation_weight)
294
+ .alias(_SCALED_PW)
255
295
  )
256
296
 
257
297
  if (
@@ -264,16 +304,38 @@ class PlayerRatingGenerator(RatingGenerator):
264
304
  df = df.with_columns(
265
305
  (pl.col(cn.projected_participation_weight) / denom)
266
306
  .clip(0.0, 1.0)
267
- .alias(cn.projected_participation_weight)
307
+ .alias(_SCALED_PPW)
268
308
  )
269
309
 
270
310
  return df
271
311
 
312
+ def _get_participation_weight_col(self) -> str:
313
+ """Get the column name to use for participation weight (scaled if available)."""
314
+ cn = self.column_names
315
+ if self.scale_participation_weights and cn and cn.participation_weight:
316
+ return _SCALED_PW
317
+ return cn.participation_weight if cn else ""
318
+
319
+ def _get_projected_participation_weight_col(self) -> str:
320
+ """Get the column name to use for projected participation weight (scaled if available)."""
321
+ cn = self.column_names
322
+ if self.scale_participation_weights and cn and cn.projected_participation_weight:
323
+ return _SCALED_PPW
324
+ return cn.projected_participation_weight if cn else ""
325
+
326
+ def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
327
+ """Remove internal scaled columns before returning."""
328
+ cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
329
+ if cols_to_drop:
330
+ df = df.drop(cols_to_drop)
331
+ return df
332
+
272
333
  def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
273
334
  df = self._scale_participation_weight_columns(df)
274
335
  match_df = self._create_match_df(df)
275
336
  ratings = self._calculate_ratings(match_df)
276
337
 
338
+ # Keep scaled columns for now - they're needed by _add_rating_features
277
339
  cols = [
278
340
  c
279
341
  for c in df.columns
@@ -293,13 +355,15 @@ class PlayerRatingGenerator(RatingGenerator):
293
355
  on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
294
356
  )
295
357
 
296
- return self._add_rating_features(df)
358
+ result = self._add_rating_features(df)
359
+ return self._remove_internal_scaled_columns(result)
297
360
 
298
361
  def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
299
362
  df = self._scale_participation_weight_columns(df)
300
363
  match_df = self._create_match_df(df)
301
364
  ratings = self._calculate_future_ratings(match_df)
302
365
 
366
+ # Keep scaled columns for now - they're needed by _add_rating_features
303
367
  cols = [
304
368
  c
305
369
  for c in df.columns
@@ -324,7 +388,8 @@ class PlayerRatingGenerator(RatingGenerator):
324
388
  how="left",
325
389
  )
326
390
 
327
- return self._add_rating_features(df_with_ratings)
391
+ result = self._add_rating_features(df_with_ratings)
392
+ return self._remove_internal_scaled_columns(result)
328
393
 
329
394
  def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
330
395
  cn = self.column_names
@@ -378,9 +443,9 @@ class PlayerRatingGenerator(RatingGenerator):
378
443
  team1_off_rating, team1_def_rating = self._team_off_def_rating_from_collection(c1)
379
444
  team2_off_rating, team2_def_rating = self._team_off_def_rating_from_collection(c2)
380
445
 
381
- player_updates: list[tuple[str, str, float, float, float, float, float, float, int]] = (
382
- []
383
- )
446
+ player_updates: list[
447
+ tuple[str, str, float, float, float, float, float, float, int, str | None]
448
+ ] = []
384
449
 
385
450
  for pre_player in c1.pre_match_player_ratings:
386
451
  pid = pre_player.id
@@ -456,6 +521,7 @@ class PlayerRatingGenerator(RatingGenerator):
456
521
  float(off_change),
457
522
  float(def_change),
458
523
  day_number,
524
+ pre_player.league,
459
525
  )
460
526
  )
461
527
 
@@ -533,6 +599,7 @@ class PlayerRatingGenerator(RatingGenerator):
533
599
  float(off_change),
534
600
  float(def_change),
535
601
  day_number,
602
+ pre_player.league,
536
603
  )
537
604
  )
538
605
 
@@ -547,6 +614,7 @@ class PlayerRatingGenerator(RatingGenerator):
547
614
  _off_change,
548
615
  _def_change,
549
616
  _dn,
617
+ _league,
550
618
  ) in player_updates:
551
619
  out[cn.player_id].append(pid)
552
620
  out[cn.match_id].append(match_id)
@@ -563,15 +631,18 @@ class PlayerRatingGenerator(RatingGenerator):
563
631
  for (
564
632
  pid,
565
633
  team_id,
566
- _off_pre,
634
+ off_pre,
567
635
  _def_pre,
568
636
  _pred_off,
569
637
  _pred_def,
570
638
  off_change,
571
639
  def_change,
572
640
  dn,
641
+ league,
573
642
  ) in player_updates:
574
- pending_team_updates.append((pid, team_id, off_change, def_change, dn))
643
+ pending_team_updates.append(
644
+ (pid, team_id, off_pre, off_change, def_change, dn, league)
645
+ )
575
646
 
576
647
  if last_update_id is None:
577
648
  last_update_id = update_id
@@ -581,9 +652,11 @@ class PlayerRatingGenerator(RatingGenerator):
581
652
 
582
653
  return pl.DataFrame(out, strict=False)
583
654
 
584
- def _apply_player_updates(self, updates: list[tuple[str, str, float, float, int]]) -> None:
655
+ def _apply_player_updates(
656
+ self, updates: list[tuple[str, str, float, float, float, int, str | None]]
657
+ ) -> None:
585
658
 
586
- for player_id, team_id, off_change, def_change, day_number in updates:
659
+ for player_id, team_id, pre_rating, off_change, def_change, day_number, league in updates:
587
660
  off_state = self._player_off_ratings[player_id]
588
661
  off_state.confidence_sum = self._calculate_post_match_confidence_sum(
589
662
  entity_rating=off_state,
@@ -606,6 +679,19 @@ class PlayerRatingGenerator(RatingGenerator):
606
679
  def_state.last_match_day_number = int(day_number)
607
680
  def_state.most_recent_team_id = team_id
608
681
 
682
+ self.start_rating_generator.update_players_to_leagues(
683
+ PlayerRatingChange(
684
+ id=player_id,
685
+ day_number=day_number,
686
+ league=league,
687
+ participation_weight=1.0,
688
+ predicted_performance=0.0,
689
+ performance=0.0,
690
+ pre_match_rating_value=pre_rating,
691
+ rating_change_value=off_change,
692
+ )
693
+ )
694
+
609
695
  def _add_rating_features(self, df: pl.DataFrame) -> pl.DataFrame:
610
696
  cols_to_add = set((self._features_out or []) + (self.non_predictor_features_out or []))
611
697
 
@@ -618,6 +704,7 @@ class PlayerRatingGenerator(RatingGenerator):
618
704
  or self.OPP_RATING_PROJ_COL in cols_to_add
619
705
  or self.DIFF_PROJ_COL in cols_to_add
620
706
  or self.MEAN_PROJ_COL in cols_to_add
707
+ or self.PLAYER_DIFF_FROM_TEAM_PROJ_COL in cols_to_add
621
708
  ):
622
709
  df = add_team_rating_projected(
623
710
  df=df,
@@ -673,6 +760,13 @@ class PlayerRatingGenerator(RatingGenerator):
673
760
  )
674
761
  )
675
762
 
763
+ if self.PLAYER_DIFF_FROM_TEAM_PROJ_COL in cols_to_add:
764
+ df = df.with_columns(
765
+ (pl.col(self.PLAYER_OFF_RATING_COL) - pl.col(self.TEAM_OFF_RATING_PROJ_COL)).alias(
766
+ self.PLAYER_DIFF_FROM_TEAM_PROJ_COL
767
+ )
768
+ )
769
+
676
770
  if (
677
771
  self.TEAM_RATING_COL in cols_to_add
678
772
  or self.OPP_RATING_COL in cols_to_add
@@ -752,9 +846,13 @@ class PlayerRatingGenerator(RatingGenerator):
752
846
 
753
847
  if cn.participation_weight and cn.participation_weight in df.columns:
754
848
  player_stat_cols.append(cn.participation_weight)
849
+ if _SCALED_PW in df.columns:
850
+ player_stat_cols.append(_SCALED_PW)
755
851
 
756
852
  if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
757
853
  player_stat_cols.append(cn.projected_participation_weight)
854
+ if _SCALED_PPW in df.columns:
855
+ player_stat_cols.append(_SCALED_PPW)
758
856
 
759
857
  if cn.position and cn.position in df.columns:
760
858
  player_stat_cols.append(cn.position)
@@ -810,14 +908,23 @@ class PlayerRatingGenerator(RatingGenerator):
810
908
  position = team_player.get(cn.position)
811
909
  player_league = team_player.get(cn.league, None)
812
910
 
813
- participation_weight = (
814
- team_player.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
815
- )
816
- projected_participation_weight = (
817
- team_player.get(cn.projected_participation_weight, participation_weight)
818
- if cn.projected_participation_weight
819
- else participation_weight
820
- )
911
+ # Use scaled participation weight if available, otherwise use original
912
+ if _SCALED_PW in team_player:
913
+ participation_weight = team_player.get(_SCALED_PW, 1.0)
914
+ elif cn.participation_weight:
915
+ participation_weight = team_player.get(cn.participation_weight, 1.0)
916
+ else:
917
+ participation_weight = 1.0
918
+
919
+ # Use scaled projected participation weight if available, otherwise use original
920
+ if _SCALED_PPW in team_player:
921
+ projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
922
+ elif cn.projected_participation_weight:
923
+ projected_participation_weight = team_player.get(
924
+ cn.projected_participation_weight, participation_weight
925
+ )
926
+ else:
927
+ projected_participation_weight = participation_weight
821
928
  projected_participation_weights.append(projected_participation_weight)
822
929
 
823
930
  perf_val = (
@@ -1043,14 +1150,21 @@ class PlayerRatingGenerator(RatingGenerator):
1043
1150
  position = tp.get(cn.position)
1044
1151
  league = tp.get(cn.league, None)
1045
1152
 
1046
- pw = (
1047
- tp.get(cn.participation_weight, 1.0) if cn.participation_weight else 1.0
1048
- )
1049
- ppw = (
1050
- tp.get(cn.projected_participation_weight, pw)
1051
- if cn.projected_participation_weight
1052
- else pw
1053
- )
1153
+ # Use scaled participation weight if available, otherwise use original
1154
+ if _SCALED_PW in tp:
1155
+ pw = tp.get(_SCALED_PW, 1.0)
1156
+ elif cn.participation_weight:
1157
+ pw = tp.get(cn.participation_weight, 1.0)
1158
+ else:
1159
+ pw = 1.0
1160
+
1161
+ # Use scaled projected participation weight if available, otherwise use original
1162
+ if _SCALED_PPW in tp:
1163
+ ppw = tp.get(_SCALED_PPW, pw)
1164
+ elif cn.projected_participation_weight:
1165
+ ppw = tp.get(cn.projected_participation_weight, pw)
1166
+ else:
1167
+ ppw = pw
1054
1168
  proj_w.append(float(ppw))
1055
1169
 
1056
1170
  mp = MatchPerformance(