spforge 0.8.2__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/lol/pipeline_transformer_example.py +69 -86
- examples/nba/cross_validation_example.py +4 -11
- examples/nba/feature_engineering_example.py +33 -15
- examples/nba/game_winner_example.py +24 -14
- examples/nba/predictor_transformers_example.py +29 -16
- spforge/__init__.py +1 -0
- spforge/features_generator_pipeline.py +8 -4
- spforge/hyperparameter_tuning/__init__.py +12 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
- spforge/hyperparameter_tuning/_tuner.py +192 -0
- spforge/ratings/__init__.py +4 -0
- spforge/ratings/_player_rating.py +11 -0
- spforge/ratings/league_start_rating_optimizer.py +201 -0
- spforge/scorer/_score.py +38 -3
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/METADATA +12 -19
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/RECORD +26 -22
- tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
- tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
- tests/ratings/test_player_rating_generator.py +27 -0
- tests/scorer/test_score.py +343 -0
- tests/test_feature_generator_pipeline.py +43 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/WHEEL +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/top_level.txt +0 -0
spforge/scorer/_score.py
CHANGED
|
@@ -350,6 +350,7 @@ class PWMSE(BaseScorer):
|
|
|
350
350
|
labels: list[int] | None = None,
|
|
351
351
|
compare_to_naive: bool = False,
|
|
352
352
|
naive_granularity: list[str] | None = None,
|
|
353
|
+
evaluation_labels: list[int] | None = None,
|
|
353
354
|
):
|
|
354
355
|
self.pred_column_name = pred_column
|
|
355
356
|
super().__init__(
|
|
@@ -363,12 +364,39 @@ class PWMSE(BaseScorer):
|
|
|
363
364
|
naive_granularity=naive_granularity,
|
|
364
365
|
)
|
|
365
366
|
self.labels = labels
|
|
367
|
+
self.evaluation_labels = evaluation_labels
|
|
368
|
+
|
|
369
|
+
self._eval_indices: list[int] | None = None
|
|
370
|
+
if self.evaluation_labels is not None and self.labels is not None:
|
|
371
|
+
label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
|
|
372
|
+
self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
|
|
373
|
+
|
|
374
|
+
def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
|
|
375
|
+
if self._eval_indices is None:
|
|
376
|
+
return preds
|
|
377
|
+
sliced = preds[:, self._eval_indices]
|
|
378
|
+
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
379
|
+
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
380
|
+
return sliced / row_sums
|
|
381
|
+
|
|
382
|
+
def _get_scoring_labels(self) -> list[int]:
|
|
383
|
+
if self.evaluation_labels is not None:
|
|
384
|
+
return self.evaluation_labels
|
|
385
|
+
return self.labels
|
|
366
386
|
|
|
367
387
|
def _pwmse_score(self, targets: np.ndarray, preds: np.ndarray) -> float:
|
|
368
|
-
labels = np.asarray(self.
|
|
388
|
+
labels = np.asarray(self._get_scoring_labels(), dtype=np.float64)
|
|
369
389
|
diffs_sqd = (labels[None, :] - targets[:, None]) ** 2
|
|
370
390
|
return float((diffs_sqd * preds).sum(axis=1).mean())
|
|
371
391
|
|
|
392
|
+
def _filter_targets_for_evaluation(self, df: IntoFrameT) -> IntoFrameT:
|
|
393
|
+
if self.evaluation_labels is None:
|
|
394
|
+
return df
|
|
395
|
+
eval_set = set(self.evaluation_labels)
|
|
396
|
+
min_eval, max_eval = min(eval_set), max(eval_set)
|
|
397
|
+
target_col = nw.col(self.target)
|
|
398
|
+
return df.filter((target_col >= min_eval) & (target_col <= max_eval))
|
|
399
|
+
|
|
372
400
|
@narwhals.narwhalify
|
|
373
401
|
def score(self, df: IntoFrameT) -> float | dict[tuple, float]:
|
|
374
402
|
df = apply_filters(df, self.filters)
|
|
@@ -386,6 +414,9 @@ class PWMSE(BaseScorer):
|
|
|
386
414
|
after,
|
|
387
415
|
)
|
|
388
416
|
|
|
417
|
+
# Filter targets outside evaluation_labels range
|
|
418
|
+
df = self._filter_targets_for_evaluation(df)
|
|
419
|
+
|
|
389
420
|
if self.aggregation_level:
|
|
390
421
|
first_pred = df[self.pred_column].to_list()[0] if len(df) > 0 else None
|
|
391
422
|
if isinstance(first_pred, (list, np.ndarray)):
|
|
@@ -415,12 +446,13 @@ class PWMSE(BaseScorer):
|
|
|
415
446
|
|
|
416
447
|
targets = gran_df[self.target].to_numpy().astype(np.float64)
|
|
417
448
|
preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
|
|
449
|
+
preds = self._slice_and_renormalize(preds)
|
|
418
450
|
score = self._pwmse_score(targets, preds)
|
|
419
451
|
if self.compare_to_naive:
|
|
420
452
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
421
453
|
gran_df,
|
|
422
454
|
self.target,
|
|
423
|
-
list(self.
|
|
455
|
+
list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
|
|
424
456
|
self.naive_granularity,
|
|
425
457
|
)
|
|
426
458
|
naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
|
|
@@ -432,12 +464,13 @@ class PWMSE(BaseScorer):
|
|
|
432
464
|
|
|
433
465
|
targets = df[self.target].to_numpy().astype(np.float64)
|
|
434
466
|
preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
|
|
467
|
+
preds = self._slice_and_renormalize(preds)
|
|
435
468
|
score = self._pwmse_score(targets, preds)
|
|
436
469
|
if self.compare_to_naive:
|
|
437
470
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
438
471
|
df,
|
|
439
472
|
self.target,
|
|
440
|
-
list(self.
|
|
473
|
+
list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
|
|
441
474
|
self.naive_granularity,
|
|
442
475
|
)
|
|
443
476
|
naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
|
|
@@ -1358,4 +1391,6 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1358
1391
|
df, self.outcome_column, labels, self.naive_granularity
|
|
1359
1392
|
)
|
|
1360
1393
|
naive_score = self._score_with_probabilities(df, naive_list)
|
|
1394
|
+
if isinstance(score, dict) and isinstance(naive_score, dict):
|
|
1395
|
+
return {k: naive_score[k] - score[k] for k in score.keys()}
|
|
1361
1396
|
return float(naive_score - score)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.8
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
|
|
|
17
17
|
License-File: LICENSE
|
|
18
18
|
Requires-Dist: numpy>=1.23.4
|
|
19
19
|
Requires-Dist: optuna>=3.4.0
|
|
20
|
-
Requires-Dist: pandas
|
|
20
|
+
Requires-Dist: pandas<3.0.0,>=2.0.0
|
|
21
21
|
Requires-Dist: pendulum>=1.0.0
|
|
22
22
|
Requires-Dist: scikit-learn>=1.4.0
|
|
23
23
|
Requires-Dist: lightgbm>=4.0.0
|
|
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
|
|
|
85
85
|
import pandas as pd
|
|
86
86
|
from sklearn.linear_model import LogisticRegression
|
|
87
87
|
|
|
88
|
+
from examples import get_sub_sample_nba_data
|
|
88
89
|
from spforge.autopipeline import AutoPipeline
|
|
89
90
|
from spforge.data_structures import ColumnNames
|
|
90
|
-
from spforge.ratings import RatingKnownFeatures
|
|
91
|
-
from spforge.ratings._player_rating import PlayerRatingGenerator
|
|
91
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
92
92
|
|
|
93
|
-
df =
|
|
93
|
+
df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
|
|
94
94
|
|
|
95
95
|
# Step 1: Define column mappings for your dataset
|
|
96
96
|
column_names = ColumnNames(
|
|
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
|
|
|
144
144
|
pipeline = AutoPipeline(
|
|
145
145
|
estimator=LogisticRegression(),
|
|
146
146
|
granularity=["game_id", "team_id"], # Aggregate players → teams
|
|
147
|
-
|
|
147
|
+
estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
# Train on historical data
|
|
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
|
|
|
302
302
|
prediction_column_name="points_pred",
|
|
303
303
|
target_column="points",
|
|
304
304
|
n_splits=3, # Number of temporal folds
|
|
305
|
-
# Must include both
|
|
306
|
-
features=pipeline.
|
|
305
|
+
# Must include both estimator features and context features
|
|
306
|
+
features=pipeline.required_features,
|
|
307
307
|
)
|
|
308
308
|
|
|
309
309
|
# Generate validation predictions
|
|
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
|
|
|
330
330
|
- `is_validation=1` marks validation rows, `is_validation=0` marks training rows
|
|
331
331
|
- Use `validation_column` in scorer to score only validation rows
|
|
332
332
|
- Training data always comes BEFORE validation data chronologically
|
|
333
|
-
- Must pass
|
|
333
|
+
- Must pass all required features (use `pipeline.required_features`)
|
|
334
334
|
- Scorers can filter rows (e.g., only score players who played minutes > 0)
|
|
335
335
|
|
|
336
336
|
See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
|
|
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
|
|
|
371
371
|
# Approach 1: LGBMClassifier (direct probability prediction)
|
|
372
372
|
pipeline_classifier = AutoPipeline(
|
|
373
373
|
estimator=LGBMClassifier(verbose=-100, random_state=42),
|
|
374
|
-
|
|
374
|
+
estimator_features=features_pipeline.features_out,
|
|
375
375
|
)
|
|
376
376
|
|
|
377
377
|
# Approach 2: LGBMRegressor + NegativeBinomialEstimator
|
|
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
|
|
|
385
385
|
|
|
386
386
|
pipeline_negbin = AutoPipeline(
|
|
387
387
|
estimator=distribution_estimator,
|
|
388
|
-
|
|
389
|
-
context_feature_names=[
|
|
390
|
-
column_names.player_id,
|
|
391
|
-
column_names.start_date,
|
|
392
|
-
column_names.team_id,
|
|
393
|
-
column_names.match_id,
|
|
394
|
-
],
|
|
388
|
+
estimator_features=features_pipeline.features_out,
|
|
395
389
|
predictor_transformers=[
|
|
396
390
|
EstimatorTransformer(
|
|
397
391
|
prediction_column_name="points_estimate",
|
|
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
|
|
|
439
433
|
# Stage 2: Refine estimate using Stage 1 output
|
|
440
434
|
player_points_pipeline = AutoPipeline(
|
|
441
435
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
442
|
-
|
|
436
|
+
estimator_features=features_pipeline.features_out, # Original features
|
|
443
437
|
# predictor_transformers execute first, adding their predictions
|
|
444
438
|
predictor_transformers=[points_estimate_transformer],
|
|
445
439
|
)
|
|
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
|
|
|
474
468
|
- **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
|
|
475
469
|
- **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
|
|
476
470
|
- **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
|
|
477
|
-
|
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
examples/__init__.py,sha256=qGLpphvrjQj0-zS9vP0Q07L-anDnmw7gFZJUEBgYG3U,158
|
|
2
2
|
examples/game_level_example.py,sha256=EOr-H0K79O3Zah4wWuqa5DLmT2iZGbfgxD-xSU2-dfI,2244
|
|
3
3
|
examples/lol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
examples/lol/pipeline_transformer_example.py,sha256=
|
|
4
|
+
examples/lol/pipeline_transformer_example.py,sha256=XVmm6Xya5z7JyOA0s-DISOlR2I1wpUthCyhRSt9n6qE,3402
|
|
5
5
|
examples/lol/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
examples/lol/data/subsample_lol_data.parquet,sha256=tl04XDslylECJUV1e0DGeqMb6D0Uh6_48NO6TykdgQI,343549
|
|
7
7
|
examples/lol/data/utils.py,sha256=Lt3XNNa5cavvFXHaTQ-GOPxSuWmPEfEO0CVXQEyF_s0,486
|
|
8
8
|
examples/nba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
examples/nba/cross_validation_example.py,sha256=
|
|
10
|
-
examples/nba/feature_engineering_example.py,sha256=
|
|
11
|
-
examples/nba/game_winner_example.py,sha256=
|
|
12
|
-
examples/nba/predictor_transformers_example.py,sha256=
|
|
9
|
+
examples/nba/cross_validation_example.py,sha256=XVnQJ5mqMou9z83ML5J0wS3gk-pa56sdvahJYQgZ8os,5056
|
|
10
|
+
examples/nba/feature_engineering_example.py,sha256=BDd5594Yi_56lGDqz3SYQkwT8NVZyFkgv3gKPCsAjz4,8197
|
|
11
|
+
examples/nba/game_winner_example.py,sha256=7VVHxGyU2uPjT9q6lDMHJ5KpkWp9gU8brxr_UZfuSHg,3189
|
|
12
|
+
examples/nba/predictor_transformers_example.py,sha256=Fl4BY_hVW0iYERolN6s-ZB2xv-UxOK547L6iI5t0r0Y,8807
|
|
13
13
|
examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
|
|
15
15
|
examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
|
|
16
|
-
spforge/__init__.py,sha256=
|
|
16
|
+
spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
|
|
17
17
|
spforge/autopipeline.py,sha256=ZUwv6Q6O8cD0u5TiSqG6lhW0j16RlSb160AzuOeL2R8,23186
|
|
18
18
|
spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
|
|
19
19
|
spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
|
|
20
|
-
spforge/features_generator_pipeline.py,sha256=
|
|
20
|
+
spforge/features_generator_pipeline.py,sha256=n8vzZKqXNFcFRDWZhllnkhAh5NFXdOD3FEIOpHcay8E,8208
|
|
21
21
|
spforge/utils.py,sha256=2RlivUtMX5wQWpFVUyFfexDJE0wV6uZ4dnNzvoDmVhI,2644
|
|
22
22
|
spforge/cross_validator/__init__.py,sha256=1QHgTFIZ73EZ_MgJlUKimxdUmB7MFaOEy6jsUs6V0T0,134
|
|
23
23
|
spforge/cross_validator/_base.py,sha256=-zxZ2Q2tYlGIwjQQMf9_OglS_doppp47gVElkJuBY7E,1199
|
|
@@ -43,25 +43,26 @@ spforge/feature_generator/_rolling_mean_binary.py,sha256=lmODy-o9Dd9pb8IlA7g4UyA
|
|
|
43
43
|
spforge/feature_generator/_rolling_mean_days.py,sha256=EZQmFmYVQB-JjZV5k8bOWnaTxNpPDCZAjdfdhiiG4r4,8415
|
|
44
44
|
spforge/feature_generator/_rolling_window.py,sha256=HT8LezsRIPNAlMEoP9oTPW2bKFu55ZSRnQZGST7fncw,8836
|
|
45
45
|
spforge/feature_generator/_utils.py,sha256=KDn33ia1OYJTK8THFpvc_uRiH_Bl3fImGqqbfzs0YA4,9654
|
|
46
|
-
spforge/hyperparameter_tuning/__init__.py,sha256=
|
|
47
|
-
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=
|
|
48
|
-
spforge/hyperparameter_tuning/_tuner.py,sha256=
|
|
46
|
+
spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8031Tu_rxlxyY,1015
|
|
47
|
+
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
|
|
48
|
+
spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
|
|
49
49
|
spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
|
|
50
50
|
spforge/performance_transformers/_performance_manager.py,sha256=KwAga6dGhNkXi-MDW6LPjwk6VZwCcjo5L--jnk9aio8,9706
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
|
-
spforge/ratings/__init__.py,sha256=
|
|
52
|
+
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=MyqsyLSY6d7_bxDSnF8eWOyXpSCADWGdepdFSGM4cHw,51365
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
|
+
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
58
59
|
spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
|
|
59
60
|
spforge/ratings/start_rating_generator.py,sha256=_7hIJ9KRVCwsCoY1GIzY8cuOdHR8RH_BCMeMwQG3E04,6776
|
|
60
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
61
62
|
spforge/ratings/team_start_rating_generator.py,sha256=ZJe84sTvE4Yep3d4wKJMMJn2Q4PhcCwkO7Wyd5nsYUA,5110
|
|
62
63
|
spforge/ratings/utils.py,sha256=qms5J5SD-FyXDR2G8giDMbu_AoLgI135pjW4nghxROg,3940
|
|
63
64
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
64
|
-
spforge/scorer/_score.py,sha256=
|
|
65
|
+
spforge/scorer/_score.py,sha256=TR0T9nJj0aeVgGfOE0fZmXlO66CELulYwxhi7ZAxhvY,56184
|
|
65
66
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
66
67
|
spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
|
|
67
68
|
spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
|
|
@@ -70,15 +71,17 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
|
|
|
70
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
71
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
72
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
73
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
74
75
|
tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
|
|
75
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
76
|
-
tests/test_feature_generator_pipeline.py,sha256=
|
|
77
|
+
tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
|
|
77
78
|
tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
|
|
78
79
|
tests/distributions/test_distribution.py,sha256=aU8hfCgliM80TES4WGjs9KFXpV8XghBGF7Hu9sqEVSE,10982
|
|
80
|
+
tests/end_to_end/test_estimator_hyperparameter_tuning.py,sha256=fZCJ9rrED2vT68B9ovmVA1cIG2pHRTjy9xzZLxxpEBo,2513
|
|
81
|
+
tests/end_to_end/test_league_start_rating_optimizer.py,sha256=Mmct2ixp4c6L7PGym8wZc7E-Csozryt1g4_o6OCc1uI,3141
|
|
79
82
|
tests/end_to_end/test_lol_player_kills.py,sha256=RJSYUbPrZ-RzSxGggj03yN0JKYeTB1JghVGYFMYia3Y,11891
|
|
80
83
|
tests/end_to_end/test_nba_player_points.py,sha256=kyzjo7QIcvpteps29Wix6IS_eJG9d1gHLeWtIHpkWMs,9066
|
|
81
|
-
tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=
|
|
84
|
+
tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=LXRkI_6Ho2kzJVbNAM17QFhx_MP9WdDJXCO9dWgJGNA,6491
|
|
82
85
|
tests/end_to_end/test_nba_prediction_consistency.py,sha256=o3DckJasx_I1ed6MhMYZUo2WSDvQ_p3HtJa9DCWTIYU,9857
|
|
83
86
|
tests/estimator/test_sklearn_estimator.py,sha256=tVfOP9Wx-tV1b6DcHbGxQHZQzNPA0Iobq8jTcUrk59U,48668
|
|
84
87
|
tests/feature_generator/test_lag.py,sha256=5Ffrv0V9cwkbkzRMPBe3_c_YNW-W2al-XH_acQIvdeg,19531
|
|
@@ -87,13 +90,14 @@ tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7AS
|
|
|
87
90
|
tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
|
|
88
91
|
tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
|
|
89
92
|
tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
|
|
93
|
+
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
90
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
|
|
91
95
|
tests/performance_transformers/test_performance_manager.py,sha256=bfC5GiBuzHw-mLmKeEzBUUPuKm0ayax2bsF1j88W8L0,10120
|
|
92
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
93
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=FGH3Tq0uFoSlkS_XMldsUKhsovBRBvzH9EbqjKvg2O0,59601
|
|
94
98
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
95
99
|
tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
|
|
96
|
-
tests/scorer/test_score.py,sha256=
|
|
100
|
+
tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
|
|
97
101
|
tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
|
|
98
102
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
99
103
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
@@ -101,7 +105,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
101
105
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
102
106
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
103
107
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
104
|
-
spforge-0.8.
|
|
105
|
-
spforge-0.8.
|
|
106
|
-
spforge-0.8.
|
|
107
|
-
spforge-0.8.
|
|
108
|
+
spforge-0.8.8.dist-info/METADATA,sha256=fO2JHqnnqOrjkWZ1Zh4rgYg58bi4YzxhSa8I72wqDs4,20047
|
|
109
|
+
spforge-0.8.8.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
110
|
+
spforge-0.8.8.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
111
|
+
spforge-0.8.8.dist-info/RECORD,,
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
from sklearn.linear_model import LogisticRegression
|
|
3
|
+
from sklearn.metrics import mean_absolute_error
|
|
4
|
+
|
|
5
|
+
from examples import get_sub_sample_nba_data
|
|
6
|
+
from spforge import AutoPipeline, ColumnNames, EstimatorHyperparameterTuner, ParamSpec
|
|
7
|
+
from spforge.cross_validator import MatchKFoldCrossValidator
|
|
8
|
+
from spforge.scorer import SklearnScorer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_nba_estimator_hyperparameter_tuning__workflow_completes():
|
|
12
|
+
df = get_sub_sample_nba_data(as_polars=True, as_pandas=False)
|
|
13
|
+
column_names = ColumnNames(
|
|
14
|
+
team_id="team_id",
|
|
15
|
+
match_id="game_id",
|
|
16
|
+
start_date="start_date",
|
|
17
|
+
player_id="player_id",
|
|
18
|
+
participation_weight="minutes_ratio",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
df = df.sort(
|
|
22
|
+
[
|
|
23
|
+
column_names.start_date,
|
|
24
|
+
column_names.match_id,
|
|
25
|
+
column_names.team_id,
|
|
26
|
+
column_names.player_id,
|
|
27
|
+
]
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
df = df.with_columns(
|
|
31
|
+
[
|
|
32
|
+
(pl.col("minutes") / pl.col("minutes").sum().over("game_id")).alias(
|
|
33
|
+
"minutes_ratio"
|
|
34
|
+
),
|
|
35
|
+
(pl.col("points") > pl.lit(10)).cast(pl.Int64).alias("points_over_10"),
|
|
36
|
+
]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
estimator = AutoPipeline(
|
|
40
|
+
estimator=LogisticRegression(max_iter=200),
|
|
41
|
+
estimator_features=["minutes", "minutes_ratio"],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
cv = MatchKFoldCrossValidator(
|
|
45
|
+
match_id_column_name=column_names.match_id,
|
|
46
|
+
date_column_name=column_names.start_date,
|
|
47
|
+
target_column="points_over_10",
|
|
48
|
+
estimator=estimator,
|
|
49
|
+
prediction_column_name="points_pred",
|
|
50
|
+
n_splits=2,
|
|
51
|
+
features=estimator.required_features,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
scorer = SklearnScorer(
|
|
55
|
+
scorer_function=mean_absolute_error,
|
|
56
|
+
pred_column="points_pred",
|
|
57
|
+
target="points_over_10",
|
|
58
|
+
validation_column="is_validation",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
tuner = EstimatorHyperparameterTuner(
|
|
62
|
+
estimator=estimator,
|
|
63
|
+
cross_validator=cv,
|
|
64
|
+
scorer=scorer,
|
|
65
|
+
direction="minimize",
|
|
66
|
+
param_search_space={
|
|
67
|
+
"C": ParamSpec(
|
|
68
|
+
param_type="float",
|
|
69
|
+
low=0.1,
|
|
70
|
+
high=2.0,
|
|
71
|
+
log=True,
|
|
72
|
+
),
|
|
73
|
+
},
|
|
74
|
+
n_trials=3,
|
|
75
|
+
show_progress_bar=False,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
result = tuner.optimize(df)
|
|
79
|
+
|
|
80
|
+
assert result.best_params is not None
|
|
81
|
+
assert isinstance(result.best_params, dict)
|
|
82
|
+
assert "estimator__C" in result.best_params
|
|
83
|
+
assert isinstance(result.best_value, float)
|
|
84
|
+
assert result.best_trial is not None
|
|
85
|
+
assert result.study is not None
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from spforge import ColumnNames
|
|
6
|
+
from spforge.ratings import (
|
|
7
|
+
LeagueStartRatingOptimizer,
|
|
8
|
+
PlayerRatingGenerator,
|
|
9
|
+
TeamRatingGenerator,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _player_df():
|
|
14
|
+
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
|
15
|
+
rows = []
|
|
16
|
+
for i, date in enumerate(dates):
|
|
17
|
+
mid = f"M{i}"
|
|
18
|
+
for player_idx in range(2):
|
|
19
|
+
rows.append(
|
|
20
|
+
{
|
|
21
|
+
"pid": f"A{player_idx}",
|
|
22
|
+
"tid": "TA",
|
|
23
|
+
"mid": mid,
|
|
24
|
+
"date": date,
|
|
25
|
+
"league": "LCK",
|
|
26
|
+
"perf": 0.4,
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
for player_idx in range(2):
|
|
30
|
+
rows.append(
|
|
31
|
+
{
|
|
32
|
+
"pid": f"B{player_idx}",
|
|
33
|
+
"tid": "TB",
|
|
34
|
+
"mid": mid,
|
|
35
|
+
"date": date,
|
|
36
|
+
"league": "LEC",
|
|
37
|
+
"perf": 0.6,
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
return pd.DataFrame(rows)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _team_df():
|
|
44
|
+
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
|
45
|
+
rows = []
|
|
46
|
+
for i, date in enumerate(dates):
|
|
47
|
+
mid = f"M{i}"
|
|
48
|
+
rows.extend(
|
|
49
|
+
[
|
|
50
|
+
{
|
|
51
|
+
"tid": "TA",
|
|
52
|
+
"mid": mid,
|
|
53
|
+
"date": date,
|
|
54
|
+
"league": "LCK",
|
|
55
|
+
"perf": 0.4,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"tid": "TB",
|
|
59
|
+
"mid": mid,
|
|
60
|
+
"date": date,
|
|
61
|
+
"league": "LEC",
|
|
62
|
+
"perf": 0.6,
|
|
63
|
+
},
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
return pd.DataFrame(rows)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.mark.parametrize("use_polars", [False, True])
|
|
70
|
+
def test_league_start_rating_optimizer__adjusts_player_leagues(use_polars):
|
|
71
|
+
cn = ColumnNames(
|
|
72
|
+
player_id="pid",
|
|
73
|
+
team_id="tid",
|
|
74
|
+
match_id="mid",
|
|
75
|
+
start_date="date",
|
|
76
|
+
league="league",
|
|
77
|
+
)
|
|
78
|
+
df = _player_df()
|
|
79
|
+
if use_polars:
|
|
80
|
+
df = pl.from_pandas(df)
|
|
81
|
+
generator = PlayerRatingGenerator(performance_column="perf", column_names=cn)
|
|
82
|
+
optimizer = LeagueStartRatingOptimizer(
|
|
83
|
+
rating_generator=generator,
|
|
84
|
+
n_iterations=1,
|
|
85
|
+
learning_rate=0.5,
|
|
86
|
+
min_cross_region_rows=1,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
result = optimizer.optimize(df)
|
|
90
|
+
|
|
91
|
+
assert result.league_ratings["LCK"] < 1000
|
|
92
|
+
assert result.league_ratings["LEC"] > 1000
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.mark.parametrize("use_polars", [False, True])
|
|
96
|
+
def test_league_start_rating_optimizer__adjusts_team_leagues(use_polars):
|
|
97
|
+
cn = ColumnNames(
|
|
98
|
+
team_id="tid",
|
|
99
|
+
match_id="mid",
|
|
100
|
+
start_date="date",
|
|
101
|
+
league="league",
|
|
102
|
+
)
|
|
103
|
+
df = _team_df()
|
|
104
|
+
if use_polars:
|
|
105
|
+
df = pl.from_pandas(df)
|
|
106
|
+
generator = TeamRatingGenerator(performance_column="perf", column_names=cn)
|
|
107
|
+
optimizer = LeagueStartRatingOptimizer(
|
|
108
|
+
rating_generator=generator,
|
|
109
|
+
n_iterations=1,
|
|
110
|
+
learning_rate=0.5,
|
|
111
|
+
min_cross_region_rows=1,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
result = optimizer.optimize(df)
|
|
115
|
+
|
|
116
|
+
assert result.league_ratings["LCK"] < 1000
|
|
117
|
+
assert result.league_ratings["LEC"] > 1000
|
|
@@ -97,6 +97,11 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
|
|
|
97
97
|
"confidence_max_sum",
|
|
98
98
|
"use_off_def_split",
|
|
99
99
|
"performance_predictor",
|
|
100
|
+
"start_team_weight",
|
|
101
|
+
"start_league_quantile",
|
|
102
|
+
"start_min_count_for_percentiles",
|
|
103
|
+
"start_min_match_count_team_rating",
|
|
104
|
+
"start_team_rating_subtract",
|
|
100
105
|
}
|
|
101
106
|
assert set(result.best_params.keys()) == expected_params
|
|
102
107
|
|