spforge 0.8.4__py3-none-any.whl → 0.8.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- examples/lol/pipeline_transformer_example.py +69 -86
- examples/nba/cross_validation_example.py +4 -11
- examples/nba/feature_engineering_example.py +33 -15
- examples/nba/game_winner_example.py +24 -14
- examples/nba/predictor_transformers_example.py +29 -16
- spforge/__init__.py +1 -0
- spforge/autopipeline.py +169 -5
- spforge/estimator/_group_by_estimator.py +11 -3
- spforge/features_generator_pipeline.py +8 -4
- spforge/hyperparameter_tuning/__init__.py +12 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
- spforge/hyperparameter_tuning/_tuner.py +192 -0
- spforge/performance_transformers/_performance_manager.py +2 -4
- spforge/ratings/__init__.py +4 -0
- spforge/ratings/_player_rating.py +142 -28
- spforge/ratings/league_start_rating_optimizer.py +201 -0
- spforge/ratings/start_rating_generator.py +1 -1
- spforge/ratings/team_start_rating_generator.py +1 -1
- spforge/ratings/utils.py +16 -6
- spforge/scorer/_score.py +42 -11
- spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/METADATA +12 -19
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/RECORD +37 -31
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/WHEEL +1 -1
- tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
- tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
- tests/performance_transformers/test_performance_manager.py +15 -0
- tests/ratings/test_player_rating_generator.py +154 -0
- tests/ratings/test_player_rating_no_mutation.py +214 -0
- tests/ratings/test_utils_scaled_weights.py +136 -0
- tests/scorer/test_score.py +232 -0
- tests/test_autopipeline.py +336 -6
- tests/test_feature_generator_pipeline.py +43 -0
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/top_level.txt +0 -0
|
@@ -45,6 +45,8 @@ class ParamSpec:
|
|
|
45
45
|
elif self.param_type == "int":
|
|
46
46
|
if self.low is None or self.high is None:
|
|
47
47
|
raise ValueError(f"int parameter '{name}' requires low and high bounds")
|
|
48
|
+
if self.step is None:
|
|
49
|
+
return trial.suggest_int(name, int(self.low), int(self.high))
|
|
48
50
|
return trial.suggest_int(name, int(self.low), int(self.high), step=self.step)
|
|
49
51
|
elif self.param_type == "categorical":
|
|
50
52
|
if self.choices is None:
|
|
@@ -272,3 +274,193 @@ class RatingHyperparameterTuner:
|
|
|
272
274
|
raise ValueError("Scorer returned invalid values in dict")
|
|
273
275
|
return float(np.mean(values))
|
|
274
276
|
return float(score)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _is_estimator(obj: object) -> bool:
|
|
280
|
+
return hasattr(obj, "get_params") and hasattr(obj, "set_params")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _get_leaf_estimator_paths(estimator: Any) -> dict[str, Any]:
|
|
284
|
+
if not _is_estimator(estimator):
|
|
285
|
+
raise ValueError("estimator must implement get_params and set_params")
|
|
286
|
+
|
|
287
|
+
params = estimator.get_params(deep=True)
|
|
288
|
+
estimator_keys = [k for k, v in params.items() if _is_estimator(v)]
|
|
289
|
+
|
|
290
|
+
if not estimator_keys:
|
|
291
|
+
return {"": estimator}
|
|
292
|
+
|
|
293
|
+
leaves: list[str] = []
|
|
294
|
+
for key in estimator_keys:
|
|
295
|
+
if not any(other != key and other.startswith(f"{key}__") for other in estimator_keys):
|
|
296
|
+
leaves.append(key)
|
|
297
|
+
|
|
298
|
+
return {key: params[key] for key in sorted(leaves)}
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _build_search_space_for_targets(
|
|
302
|
+
targets: dict[str, dict[str, ParamSpec]],
|
|
303
|
+
) -> dict[str, ParamSpec]:
|
|
304
|
+
search_space: dict[str, ParamSpec] = {}
|
|
305
|
+
for path, params in targets.items():
|
|
306
|
+
for param_name, param_spec in params.items():
|
|
307
|
+
full_name = f"{path}__{param_name}" if path else param_name
|
|
308
|
+
if full_name in search_space:
|
|
309
|
+
raise ValueError(f"Duplicate parameter name detected: {full_name}")
|
|
310
|
+
search_space[full_name] = param_spec
|
|
311
|
+
return search_space
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def _enqueue_predicted_r_weight_zero(study: optuna.Study, search_space: dict[str, ParamSpec]):
|
|
315
|
+
zero_params: dict[str, float] = {}
|
|
316
|
+
for name, spec in search_space.items():
|
|
317
|
+
if not name.endswith("predicted_r_weight"):
|
|
318
|
+
continue
|
|
319
|
+
if spec.param_type not in {"float", "int"}:
|
|
320
|
+
continue
|
|
321
|
+
if spec.low is None or spec.high is None:
|
|
322
|
+
continue
|
|
323
|
+
if spec.low <= 0 <= spec.high:
|
|
324
|
+
zero_params[name] = 0.0
|
|
325
|
+
|
|
326
|
+
if zero_params:
|
|
327
|
+
study.enqueue_trial(zero_params)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class EstimatorHyperparameterTuner:
|
|
331
|
+
"""
|
|
332
|
+
Hyperparameter tuner for sklearn-compatible estimators.
|
|
333
|
+
|
|
334
|
+
Supports nested estimators and can target deepest leaf estimators.
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
def __init__(
|
|
338
|
+
self,
|
|
339
|
+
estimator: Any,
|
|
340
|
+
cross_validator: MatchKFoldCrossValidator,
|
|
341
|
+
scorer: BaseScorer,
|
|
342
|
+
direction: Literal["minimize", "maximize"],
|
|
343
|
+
param_search_space: dict[str, ParamSpec] | None = None,
|
|
344
|
+
param_targets: dict[str, dict[str, ParamSpec]] | None = None,
|
|
345
|
+
n_trials: int = 50,
|
|
346
|
+
n_jobs: int = 1,
|
|
347
|
+
storage: str | None = None,
|
|
348
|
+
study_name: str | None = None,
|
|
349
|
+
timeout: float | None = None,
|
|
350
|
+
show_progress_bar: bool = True,
|
|
351
|
+
sampler: optuna.samplers.BaseSampler | None = None,
|
|
352
|
+
pruner: optuna.pruners.BasePruner | None = None,
|
|
353
|
+
):
|
|
354
|
+
self.estimator = estimator
|
|
355
|
+
self.cross_validator = cross_validator
|
|
356
|
+
self.scorer = scorer
|
|
357
|
+
self.direction = direction
|
|
358
|
+
self.param_search_space = param_search_space
|
|
359
|
+
self.param_targets = param_targets
|
|
360
|
+
self.n_trials = n_trials
|
|
361
|
+
self.n_jobs = n_jobs
|
|
362
|
+
self.storage = storage
|
|
363
|
+
self.study_name = study_name
|
|
364
|
+
self.timeout = timeout
|
|
365
|
+
self.show_progress_bar = show_progress_bar
|
|
366
|
+
self.sampler = sampler
|
|
367
|
+
self.pruner = pruner
|
|
368
|
+
|
|
369
|
+
if direction not in ["minimize", "maximize"]:
|
|
370
|
+
raise ValueError(f"direction must be 'minimize' or 'maximize', got: {direction}")
|
|
371
|
+
|
|
372
|
+
if storage is not None and study_name is None:
|
|
373
|
+
raise ValueError("study_name is required when using storage")
|
|
374
|
+
|
|
375
|
+
if param_search_space is not None and param_targets is not None:
|
|
376
|
+
raise ValueError("param_search_space and param_targets cannot both be provided")
|
|
377
|
+
|
|
378
|
+
def optimize(self, df: IntoFrameT) -> OptunaResult:
|
|
379
|
+
from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
380
|
+
get_default_estimator_search_space,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
leaf_estimators = _get_leaf_estimator_paths(self.estimator)
|
|
384
|
+
default_targets = {
|
|
385
|
+
path: get_default_estimator_search_space(est)
|
|
386
|
+
for path, est in leaf_estimators.items()
|
|
387
|
+
}
|
|
388
|
+
default_targets = {path: space for path, space in default_targets.items() if space}
|
|
389
|
+
|
|
390
|
+
if self.param_targets is not None:
|
|
391
|
+
unknown = set(self.param_targets) - set(leaf_estimators)
|
|
392
|
+
if unknown:
|
|
393
|
+
raise ValueError(f"param_targets contains unknown estimator paths: {unknown}")
|
|
394
|
+
targets = self.param_targets
|
|
395
|
+
elif self.param_search_space is not None:
|
|
396
|
+
targets = {path: self.param_search_space for path in leaf_estimators}
|
|
397
|
+
elif default_targets:
|
|
398
|
+
targets = default_targets
|
|
399
|
+
else:
|
|
400
|
+
raise ValueError(
|
|
401
|
+
"param_search_space is required when no default search space is available"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
search_space = _build_search_space_for_targets(targets)
|
|
405
|
+
if not search_space:
|
|
406
|
+
raise ValueError("Resolved search space is empty")
|
|
407
|
+
|
|
408
|
+
study = optuna.create_study(
|
|
409
|
+
direction=self.direction,
|
|
410
|
+
sampler=self.sampler,
|
|
411
|
+
pruner=self.pruner,
|
|
412
|
+
storage=self.storage,
|
|
413
|
+
study_name=self.study_name,
|
|
414
|
+
load_if_exists=True if self.storage else False,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
_enqueue_predicted_r_weight_zero(study, search_space)
|
|
418
|
+
|
|
419
|
+
study.optimize(
|
|
420
|
+
lambda trial: self._objective(trial, df, search_space),
|
|
421
|
+
n_trials=self.n_trials,
|
|
422
|
+
n_jobs=self.n_jobs,
|
|
423
|
+
timeout=self.timeout,
|
|
424
|
+
show_progress_bar=self.show_progress_bar,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
return OptunaResult(
|
|
428
|
+
best_params=study.best_params,
|
|
429
|
+
best_value=study.best_value,
|
|
430
|
+
best_trial=study.best_trial,
|
|
431
|
+
study=study,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
def _objective(
|
|
435
|
+
self, trial: optuna.Trial, df: IntoFrameT, search_space: dict[str, ParamSpec]
|
|
436
|
+
) -> float:
|
|
437
|
+
try:
|
|
438
|
+
trial_params = self._suggest_params(trial, search_space)
|
|
439
|
+
|
|
440
|
+
copied_estimator = copy.deepcopy(self.estimator)
|
|
441
|
+
copied_estimator.set_params(**trial_params)
|
|
442
|
+
|
|
443
|
+
cv = copy.deepcopy(self.cross_validator)
|
|
444
|
+
cv.estimator = copied_estimator
|
|
445
|
+
|
|
446
|
+
validation_df = cv.generate_validation_df(df)
|
|
447
|
+
score = self.scorer.score(validation_df)
|
|
448
|
+
score_value = RatingHyperparameterTuner._aggregate_score(score)
|
|
449
|
+
|
|
450
|
+
if math.isnan(score_value) or math.isinf(score_value):
|
|
451
|
+
logger.warning(f"Trial {trial.number} returned invalid score: {score_value}")
|
|
452
|
+
return float("inf") if self.direction == "minimize" else float("-inf")
|
|
453
|
+
|
|
454
|
+
return score_value
|
|
455
|
+
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logger.warning(f"Trial {trial.number} failed with error: {e}")
|
|
458
|
+
return float("inf") if self.direction == "minimize" else float("-inf")
|
|
459
|
+
|
|
460
|
+
def _suggest_params(
|
|
461
|
+
self, trial: optuna.Trial, search_space: dict[str, ParamSpec]
|
|
462
|
+
) -> dict[str, Any]:
|
|
463
|
+
params: dict[str, Any] = {}
|
|
464
|
+
for param_name, param_spec in search_space.items():
|
|
465
|
+
params[param_name] = param_spec.suggest(trial, param_name)
|
|
466
|
+
return params
|
|
@@ -250,8 +250,6 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
250
250
|
)
|
|
251
251
|
)
|
|
252
252
|
|
|
253
|
-
sum_weight = sum([w.weight for w in self.weights])
|
|
254
|
-
|
|
255
253
|
for column_weight in self.weights:
|
|
256
254
|
weight_col = f"weight__{column_weight.name}"
|
|
257
255
|
feature_col = column_weight.name
|
|
@@ -261,14 +259,14 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
261
259
|
df = df.with_columns(
|
|
262
260
|
(
|
|
263
261
|
nw.col(tmp_out_performance_colum_name)
|
|
264
|
-
+ (nw.col(weight_col)
|
|
262
|
+
+ (nw.col(weight_col) * (1 - nw.col(feature_name)))
|
|
265
263
|
).alias(tmp_out_performance_colum_name)
|
|
266
264
|
)
|
|
267
265
|
else:
|
|
268
266
|
df = df.with_columns(
|
|
269
267
|
(
|
|
270
268
|
nw.col(tmp_out_performance_colum_name)
|
|
271
|
-
+ (nw.col(weight_col)
|
|
269
|
+
+ (nw.col(weight_col) * nw.col(feature_name))
|
|
272
270
|
).alias(tmp_out_performance_colum_name)
|
|
273
271
|
)
|
|
274
272
|
|
spforge/ratings/__init__.py
CHANGED
|
@@ -6,3 +6,7 @@ from .enums import (
|
|
|
6
6
|
RatingUnknownFeatures as RatingUnknownFeatures,
|
|
7
7
|
)
|
|
8
8
|
from .league_identifier import LeagueIdentifier as LeagueIdentifier
|
|
9
|
+
from .league_start_rating_optimizer import (
|
|
10
|
+
LeagueStartRatingOptimizationResult as LeagueStartRatingOptimizationResult,
|
|
11
|
+
LeagueStartRatingOptimizer as LeagueStartRatingOptimizer,
|
|
12
|
+
)
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
import copy
|
|
5
5
|
import math
|
|
6
|
+
import logging
|
|
6
7
|
from typing import Any, Literal
|
|
7
8
|
|
|
8
9
|
import narwhals.stable.v2 as nw
|
|
@@ -15,6 +16,7 @@ from spforge.data_structures import (
|
|
|
15
16
|
MatchPerformance,
|
|
16
17
|
MatchPlayer,
|
|
17
18
|
PlayerRating,
|
|
19
|
+
PlayerRatingChange,
|
|
18
20
|
PlayerRatingsResult,
|
|
19
21
|
PreMatchPlayerRating,
|
|
20
22
|
PreMatchPlayersCollection,
|
|
@@ -33,6 +35,8 @@ from spforge.ratings.utils import (
|
|
|
33
35
|
from spforge.feature_generator._utils import to_polars
|
|
34
36
|
|
|
35
37
|
PLAYER_STATS = "__PLAYER_STATS"
|
|
38
|
+
_SCALED_PW = "__scaled_participation_weight__"
|
|
39
|
+
_SCALED_PPW = "__scaled_projected_participation_weight__"
|
|
36
40
|
|
|
37
41
|
|
|
38
42
|
class PlayerRatingGenerator(RatingGenerator):
|
|
@@ -75,12 +79,13 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
75
79
|
start_min_count_for_percentiles: int = 50,
|
|
76
80
|
start_team_rating_subtract: float = 80,
|
|
77
81
|
start_team_weight: float = 0,
|
|
78
|
-
start_max_days_ago_league_entities: int =
|
|
82
|
+
start_max_days_ago_league_entities: int = 600,
|
|
79
83
|
start_min_match_count_team_rating: int = 2,
|
|
80
84
|
start_harcoded_start_rating: float | None = None,
|
|
81
85
|
column_names: ColumnNames | None = None,
|
|
82
86
|
output_suffix: str | None = None,
|
|
83
87
|
scale_participation_weights: bool = False,
|
|
88
|
+
auto_scale_participation_weights: bool = True,
|
|
84
89
|
**kwargs: Any,
|
|
85
90
|
):
|
|
86
91
|
super().__init__(
|
|
@@ -129,6 +134,9 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
129
134
|
str(RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_PROJECTED)
|
|
130
135
|
)
|
|
131
136
|
self.MEAN_PROJ_COL = self._suffix(str(RatingKnownFeatures.RATING_MEAN_PROJECTED))
|
|
137
|
+
self.PLAYER_DIFF_FROM_TEAM_PROJ_COL = self._suffix(
|
|
138
|
+
str(RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED)
|
|
139
|
+
)
|
|
132
140
|
|
|
133
141
|
self.TEAM_OFF_RATING_PROJ_COL = self._suffix(
|
|
134
142
|
str(RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED)
|
|
@@ -161,6 +169,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
161
169
|
|
|
162
170
|
self.use_off_def_split = bool(use_off_def_split)
|
|
163
171
|
self.scale_participation_weights = bool(scale_participation_weights)
|
|
172
|
+
self.auto_scale_participation_weights = bool(auto_scale_participation_weights)
|
|
164
173
|
self._participation_weight_max: float | None = None
|
|
165
174
|
self._projected_participation_weight_max: float | None = None
|
|
166
175
|
|
|
@@ -186,9 +195,39 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
186
195
|
column_names: ColumnNames | None = None,
|
|
187
196
|
) -> DataFrame | IntoFrameT:
|
|
188
197
|
self.column_names = column_names if column_names else self.column_names
|
|
198
|
+
self._maybe_enable_participation_weight_scaling(df)
|
|
189
199
|
self._set_participation_weight_max(df)
|
|
190
200
|
return super().fit_transform(df, column_names)
|
|
191
201
|
|
|
202
|
+
def _maybe_enable_participation_weight_scaling(self, df: DataFrame) -> None:
|
|
203
|
+
if self.scale_participation_weights or not self.auto_scale_participation_weights:
|
|
204
|
+
return
|
|
205
|
+
cn = self.column_names
|
|
206
|
+
if not cn:
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
pl_df = df.to_native() if df.implementation.is_polars() else df.to_polars().to_native()
|
|
210
|
+
|
|
211
|
+
def _out_of_bounds(col_name: str | None) -> bool:
|
|
212
|
+
if not col_name or col_name not in df.columns:
|
|
213
|
+
return False
|
|
214
|
+
col = pl_df[col_name]
|
|
215
|
+
min_val = col.min()
|
|
216
|
+
max_val = col.max()
|
|
217
|
+
if min_val is None or max_val is None:
|
|
218
|
+
return False
|
|
219
|
+
eps = 1e-6
|
|
220
|
+
return min_val < -eps or max_val > (1.0 + eps)
|
|
221
|
+
|
|
222
|
+
if _out_of_bounds(cn.participation_weight) or _out_of_bounds(
|
|
223
|
+
cn.projected_participation_weight
|
|
224
|
+
):
|
|
225
|
+
self.scale_participation_weights = True
|
|
226
|
+
logging.warning(
|
|
227
|
+
"Auto-scaling participation weights because values exceed [0, 1]. "
|
|
228
|
+
"Set scale_participation_weights=True explicitly to silence this warning."
|
|
229
|
+
)
|
|
230
|
+
|
|
192
231
|
def _ensure_player_off(self, player_id: str) -> PlayerRating:
|
|
193
232
|
if player_id not in self._player_off_ratings:
|
|
194
233
|
# create with start generator later; initialize to 0 now; overwritten when needed
|
|
@@ -237,6 +276,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
237
276
|
self._projected_participation_weight_max = self._participation_weight_max
|
|
238
277
|
|
|
239
278
|
def _scale_participation_weight_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
279
|
+
"""Create internal scaled participation weight columns without mutating originals."""
|
|
240
280
|
if not self.scale_participation_weights:
|
|
241
281
|
return df
|
|
242
282
|
if self._participation_weight_max is None or self._participation_weight_max <= 0:
|
|
@@ -251,7 +291,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
251
291
|
df = df.with_columns(
|
|
252
292
|
(pl.col(cn.participation_weight) / denom)
|
|
253
293
|
.clip(0.0, 1.0)
|
|
254
|
-
.alias(
|
|
294
|
+
.alias(_SCALED_PW)
|
|
255
295
|
)
|
|
256
296
|
|
|
257
297
|
if (
|
|
@@ -264,16 +304,38 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
264
304
|
df = df.with_columns(
|
|
265
305
|
(pl.col(cn.projected_participation_weight) / denom)
|
|
266
306
|
.clip(0.0, 1.0)
|
|
267
|
-
.alias(
|
|
307
|
+
.alias(_SCALED_PPW)
|
|
268
308
|
)
|
|
269
309
|
|
|
270
310
|
return df
|
|
271
311
|
|
|
312
|
+
def _get_participation_weight_col(self) -> str:
|
|
313
|
+
"""Get the column name to use for participation weight (scaled if available)."""
|
|
314
|
+
cn = self.column_names
|
|
315
|
+
if self.scale_participation_weights and cn and cn.participation_weight:
|
|
316
|
+
return _SCALED_PW
|
|
317
|
+
return cn.participation_weight if cn else ""
|
|
318
|
+
|
|
319
|
+
def _get_projected_participation_weight_col(self) -> str:
|
|
320
|
+
"""Get the column name to use for projected participation weight (scaled if available)."""
|
|
321
|
+
cn = self.column_names
|
|
322
|
+
if self.scale_participation_weights and cn and cn.projected_participation_weight:
|
|
323
|
+
return _SCALED_PPW
|
|
324
|
+
return cn.projected_participation_weight if cn else ""
|
|
325
|
+
|
|
326
|
+
def _remove_internal_scaled_columns(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
327
|
+
"""Remove internal scaled columns before returning."""
|
|
328
|
+
cols_to_drop = [c for c in [_SCALED_PW, _SCALED_PPW] if c in df.columns]
|
|
329
|
+
if cols_to_drop:
|
|
330
|
+
df = df.drop(cols_to_drop)
|
|
331
|
+
return df
|
|
332
|
+
|
|
272
333
|
def _historical_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
273
334
|
df = self._scale_participation_weight_columns(df)
|
|
274
335
|
match_df = self._create_match_df(df)
|
|
275
336
|
ratings = self._calculate_ratings(match_df)
|
|
276
337
|
|
|
338
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
277
339
|
cols = [
|
|
278
340
|
c
|
|
279
341
|
for c in df.columns
|
|
@@ -293,13 +355,15 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
293
355
|
on=[self.column_names.player_id, self.column_names.match_id, self.column_names.team_id],
|
|
294
356
|
)
|
|
295
357
|
|
|
296
|
-
|
|
358
|
+
result = self._add_rating_features(df)
|
|
359
|
+
return self._remove_internal_scaled_columns(result)
|
|
297
360
|
|
|
298
361
|
def _future_transform(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
299
362
|
df = self._scale_participation_weight_columns(df)
|
|
300
363
|
match_df = self._create_match_df(df)
|
|
301
364
|
ratings = self._calculate_future_ratings(match_df)
|
|
302
365
|
|
|
366
|
+
# Keep scaled columns for now - they're needed by _add_rating_features
|
|
303
367
|
cols = [
|
|
304
368
|
c
|
|
305
369
|
for c in df.columns
|
|
@@ -324,7 +388,8 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
324
388
|
how="left",
|
|
325
389
|
)
|
|
326
390
|
|
|
327
|
-
|
|
391
|
+
result = self._add_rating_features(df_with_ratings)
|
|
392
|
+
return self._remove_internal_scaled_columns(result)
|
|
328
393
|
|
|
329
394
|
def _calculate_ratings(self, match_df: pl.DataFrame) -> pl.DataFrame:
|
|
330
395
|
cn = self.column_names
|
|
@@ -378,9 +443,9 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
378
443
|
team1_off_rating, team1_def_rating = self._team_off_def_rating_from_collection(c1)
|
|
379
444
|
team2_off_rating, team2_def_rating = self._team_off_def_rating_from_collection(c2)
|
|
380
445
|
|
|
381
|
-
player_updates: list[
|
|
382
|
-
[]
|
|
383
|
-
|
|
446
|
+
player_updates: list[
|
|
447
|
+
tuple[str, str, float, float, float, float, float, float, int, str | None]
|
|
448
|
+
] = []
|
|
384
449
|
|
|
385
450
|
for pre_player in c1.pre_match_player_ratings:
|
|
386
451
|
pid = pre_player.id
|
|
@@ -456,6 +521,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
456
521
|
float(off_change),
|
|
457
522
|
float(def_change),
|
|
458
523
|
day_number,
|
|
524
|
+
pre_player.league,
|
|
459
525
|
)
|
|
460
526
|
)
|
|
461
527
|
|
|
@@ -533,6 +599,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
533
599
|
float(off_change),
|
|
534
600
|
float(def_change),
|
|
535
601
|
day_number,
|
|
602
|
+
pre_player.league,
|
|
536
603
|
)
|
|
537
604
|
)
|
|
538
605
|
|
|
@@ -547,6 +614,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
547
614
|
_off_change,
|
|
548
615
|
_def_change,
|
|
549
616
|
_dn,
|
|
617
|
+
_league,
|
|
550
618
|
) in player_updates:
|
|
551
619
|
out[cn.player_id].append(pid)
|
|
552
620
|
out[cn.match_id].append(match_id)
|
|
@@ -563,15 +631,18 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
563
631
|
for (
|
|
564
632
|
pid,
|
|
565
633
|
team_id,
|
|
566
|
-
|
|
634
|
+
off_pre,
|
|
567
635
|
_def_pre,
|
|
568
636
|
_pred_off,
|
|
569
637
|
_pred_def,
|
|
570
638
|
off_change,
|
|
571
639
|
def_change,
|
|
572
640
|
dn,
|
|
641
|
+
league,
|
|
573
642
|
) in player_updates:
|
|
574
|
-
pending_team_updates.append(
|
|
643
|
+
pending_team_updates.append(
|
|
644
|
+
(pid, team_id, off_pre, off_change, def_change, dn, league)
|
|
645
|
+
)
|
|
575
646
|
|
|
576
647
|
if last_update_id is None:
|
|
577
648
|
last_update_id = update_id
|
|
@@ -581,9 +652,11 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
581
652
|
|
|
582
653
|
return pl.DataFrame(out, strict=False)
|
|
583
654
|
|
|
584
|
-
def _apply_player_updates(
|
|
655
|
+
def _apply_player_updates(
|
|
656
|
+
self, updates: list[tuple[str, str, float, float, float, int, str | None]]
|
|
657
|
+
) -> None:
|
|
585
658
|
|
|
586
|
-
for player_id, team_id, off_change, def_change, day_number in updates:
|
|
659
|
+
for player_id, team_id, pre_rating, off_change, def_change, day_number, league in updates:
|
|
587
660
|
off_state = self._player_off_ratings[player_id]
|
|
588
661
|
off_state.confidence_sum = self._calculate_post_match_confidence_sum(
|
|
589
662
|
entity_rating=off_state,
|
|
@@ -606,6 +679,19 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
606
679
|
def_state.last_match_day_number = int(day_number)
|
|
607
680
|
def_state.most_recent_team_id = team_id
|
|
608
681
|
|
|
682
|
+
self.start_rating_generator.update_players_to_leagues(
|
|
683
|
+
PlayerRatingChange(
|
|
684
|
+
id=player_id,
|
|
685
|
+
day_number=day_number,
|
|
686
|
+
league=league,
|
|
687
|
+
participation_weight=1.0,
|
|
688
|
+
predicted_performance=0.0,
|
|
689
|
+
performance=0.0,
|
|
690
|
+
pre_match_rating_value=pre_rating,
|
|
691
|
+
rating_change_value=off_change,
|
|
692
|
+
)
|
|
693
|
+
)
|
|
694
|
+
|
|
609
695
|
def _add_rating_features(self, df: pl.DataFrame) -> pl.DataFrame:
|
|
610
696
|
cols_to_add = set((self._features_out or []) + (self.non_predictor_features_out or []))
|
|
611
697
|
|
|
@@ -618,6 +704,7 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
618
704
|
or self.OPP_RATING_PROJ_COL in cols_to_add
|
|
619
705
|
or self.DIFF_PROJ_COL in cols_to_add
|
|
620
706
|
or self.MEAN_PROJ_COL in cols_to_add
|
|
707
|
+
or self.PLAYER_DIFF_FROM_TEAM_PROJ_COL in cols_to_add
|
|
621
708
|
):
|
|
622
709
|
df = add_team_rating_projected(
|
|
623
710
|
df=df,
|
|
@@ -673,6 +760,13 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
673
760
|
)
|
|
674
761
|
)
|
|
675
762
|
|
|
763
|
+
if self.PLAYER_DIFF_FROM_TEAM_PROJ_COL in cols_to_add:
|
|
764
|
+
df = df.with_columns(
|
|
765
|
+
(pl.col(self.PLAYER_OFF_RATING_COL) - pl.col(self.TEAM_OFF_RATING_PROJ_COL)).alias(
|
|
766
|
+
self.PLAYER_DIFF_FROM_TEAM_PROJ_COL
|
|
767
|
+
)
|
|
768
|
+
)
|
|
769
|
+
|
|
676
770
|
if (
|
|
677
771
|
self.TEAM_RATING_COL in cols_to_add
|
|
678
772
|
or self.OPP_RATING_COL in cols_to_add
|
|
@@ -752,9 +846,13 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
752
846
|
|
|
753
847
|
if cn.participation_weight and cn.participation_weight in df.columns:
|
|
754
848
|
player_stat_cols.append(cn.participation_weight)
|
|
849
|
+
if _SCALED_PW in df.columns:
|
|
850
|
+
player_stat_cols.append(_SCALED_PW)
|
|
755
851
|
|
|
756
852
|
if cn.projected_participation_weight and cn.projected_participation_weight in df.columns:
|
|
757
853
|
player_stat_cols.append(cn.projected_participation_weight)
|
|
854
|
+
if _SCALED_PPW in df.columns:
|
|
855
|
+
player_stat_cols.append(_SCALED_PPW)
|
|
758
856
|
|
|
759
857
|
if cn.position and cn.position in df.columns:
|
|
760
858
|
player_stat_cols.append(cn.position)
|
|
@@ -810,14 +908,23 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
810
908
|
position = team_player.get(cn.position)
|
|
811
909
|
player_league = team_player.get(cn.league, None)
|
|
812
910
|
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
team_player.get(cn.
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
911
|
+
# Use scaled participation weight if available, otherwise use original
|
|
912
|
+
if _SCALED_PW in team_player:
|
|
913
|
+
participation_weight = team_player.get(_SCALED_PW, 1.0)
|
|
914
|
+
elif cn.participation_weight:
|
|
915
|
+
participation_weight = team_player.get(cn.participation_weight, 1.0)
|
|
916
|
+
else:
|
|
917
|
+
participation_weight = 1.0
|
|
918
|
+
|
|
919
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
920
|
+
if _SCALED_PPW in team_player:
|
|
921
|
+
projected_participation_weight = team_player.get(_SCALED_PPW, participation_weight)
|
|
922
|
+
elif cn.projected_participation_weight:
|
|
923
|
+
projected_participation_weight = team_player.get(
|
|
924
|
+
cn.projected_participation_weight, participation_weight
|
|
925
|
+
)
|
|
926
|
+
else:
|
|
927
|
+
projected_participation_weight = participation_weight
|
|
821
928
|
projected_participation_weights.append(projected_participation_weight)
|
|
822
929
|
|
|
823
930
|
perf_val = (
|
|
@@ -1043,14 +1150,21 @@ class PlayerRatingGenerator(RatingGenerator):
|
|
|
1043
1150
|
position = tp.get(cn.position)
|
|
1044
1151
|
league = tp.get(cn.league, None)
|
|
1045
1152
|
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
tp.get(cn.
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1153
|
+
# Use scaled participation weight if available, otherwise use original
|
|
1154
|
+
if _SCALED_PW in tp:
|
|
1155
|
+
pw = tp.get(_SCALED_PW, 1.0)
|
|
1156
|
+
elif cn.participation_weight:
|
|
1157
|
+
pw = tp.get(cn.participation_weight, 1.0)
|
|
1158
|
+
else:
|
|
1159
|
+
pw = 1.0
|
|
1160
|
+
|
|
1161
|
+
# Use scaled projected participation weight if available, otherwise use original
|
|
1162
|
+
if _SCALED_PPW in tp:
|
|
1163
|
+
ppw = tp.get(_SCALED_PPW, pw)
|
|
1164
|
+
elif cn.projected_participation_weight:
|
|
1165
|
+
ppw = tp.get(cn.projected_participation_weight, pw)
|
|
1166
|
+
else:
|
|
1167
|
+
ppw = pw
|
|
1054
1168
|
proj_w.append(float(ppw))
|
|
1055
1169
|
|
|
1056
1170
|
mp = MatchPerformance(
|