spforge 0.8.4__py3-none-any.whl → 0.8.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/lol/pipeline_transformer_example.py +69 -86
- examples/nba/cross_validation_example.py +4 -11
- examples/nba/feature_engineering_example.py +33 -15
- examples/nba/game_winner_example.py +24 -14
- examples/nba/predictor_transformers_example.py +29 -16
- spforge/__init__.py +1 -0
- spforge/hyperparameter_tuning/__init__.py +12 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
- spforge/hyperparameter_tuning/_tuner.py +192 -0
- spforge/ratings/__init__.py +4 -0
- spforge/ratings/_player_rating.py +11 -0
- spforge/ratings/league_start_rating_optimizer.py +201 -0
- {spforge-0.8.4.dist-info → spforge-0.8.7.dist-info}/METADATA +12 -19
- {spforge-0.8.4.dist-info → spforge-0.8.7.dist-info}/RECORD +23 -19
- tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
- tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
- tests/ratings/test_player_rating_generator.py +27 -0
- tests/scorer/test_score.py +90 -0
- {spforge-0.8.4.dist-info → spforge-0.8.7.dist-info}/WHEEL +0 -0
- {spforge-0.8.4.dist-info → spforge-0.8.7.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.4.dist-info → spforge-0.8.7.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.7
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
|
|
|
17
17
|
License-File: LICENSE
|
|
18
18
|
Requires-Dist: numpy>=1.23.4
|
|
19
19
|
Requires-Dist: optuna>=3.4.0
|
|
20
|
-
Requires-Dist: pandas
|
|
20
|
+
Requires-Dist: pandas<3.0.0,>=2.0.0
|
|
21
21
|
Requires-Dist: pendulum>=1.0.0
|
|
22
22
|
Requires-Dist: scikit-learn>=1.4.0
|
|
23
23
|
Requires-Dist: lightgbm>=4.0.0
|
|
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
|
|
|
85
85
|
import pandas as pd
|
|
86
86
|
from sklearn.linear_model import LogisticRegression
|
|
87
87
|
|
|
88
|
+
from examples import get_sub_sample_nba_data
|
|
88
89
|
from spforge.autopipeline import AutoPipeline
|
|
89
90
|
from spforge.data_structures import ColumnNames
|
|
90
|
-
from spforge.ratings import RatingKnownFeatures
|
|
91
|
-
from spforge.ratings._player_rating import PlayerRatingGenerator
|
|
91
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
92
92
|
|
|
93
|
-
df =
|
|
93
|
+
df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
|
|
94
94
|
|
|
95
95
|
# Step 1: Define column mappings for your dataset
|
|
96
96
|
column_names = ColumnNames(
|
|
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
|
|
|
144
144
|
pipeline = AutoPipeline(
|
|
145
145
|
estimator=LogisticRegression(),
|
|
146
146
|
granularity=["game_id", "team_id"], # Aggregate players → teams
|
|
147
|
-
|
|
147
|
+
estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
# Train on historical data
|
|
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
|
|
|
302
302
|
prediction_column_name="points_pred",
|
|
303
303
|
target_column="points",
|
|
304
304
|
n_splits=3, # Number of temporal folds
|
|
305
|
-
# Must include both
|
|
306
|
-
features=pipeline.
|
|
305
|
+
# Must include both estimator features and context features
|
|
306
|
+
features=pipeline.required_features,
|
|
307
307
|
)
|
|
308
308
|
|
|
309
309
|
# Generate validation predictions
|
|
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
|
|
|
330
330
|
- `is_validation=1` marks validation rows, `is_validation=0` marks training rows
|
|
331
331
|
- Use `validation_column` in scorer to score only validation rows
|
|
332
332
|
- Training data always comes BEFORE validation data chronologically
|
|
333
|
-
- Must pass
|
|
333
|
+
- Must pass all required features (use `pipeline.required_features`)
|
|
334
334
|
- Scorers can filter rows (e.g., only score players who played minutes > 0)
|
|
335
335
|
|
|
336
336
|
See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
|
|
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
|
|
|
371
371
|
# Approach 1: LGBMClassifier (direct probability prediction)
|
|
372
372
|
pipeline_classifier = AutoPipeline(
|
|
373
373
|
estimator=LGBMClassifier(verbose=-100, random_state=42),
|
|
374
|
-
|
|
374
|
+
estimator_features=features_pipeline.features_out,
|
|
375
375
|
)
|
|
376
376
|
|
|
377
377
|
# Approach 2: LGBMRegressor + NegativeBinomialEstimator
|
|
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
|
|
|
385
385
|
|
|
386
386
|
pipeline_negbin = AutoPipeline(
|
|
387
387
|
estimator=distribution_estimator,
|
|
388
|
-
|
|
389
|
-
context_feature_names=[
|
|
390
|
-
column_names.player_id,
|
|
391
|
-
column_names.start_date,
|
|
392
|
-
column_names.team_id,
|
|
393
|
-
column_names.match_id,
|
|
394
|
-
],
|
|
388
|
+
estimator_features=features_pipeline.features_out,
|
|
395
389
|
predictor_transformers=[
|
|
396
390
|
EstimatorTransformer(
|
|
397
391
|
prediction_column_name="points_estimate",
|
|
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
|
|
|
439
433
|
# Stage 2: Refine estimate using Stage 1 output
|
|
440
434
|
player_points_pipeline = AutoPipeline(
|
|
441
435
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
442
|
-
|
|
436
|
+
estimator_features=features_pipeline.features_out, # Original features
|
|
443
437
|
# predictor_transformers execute first, adding their predictions
|
|
444
438
|
predictor_transformers=[points_estimate_transformer],
|
|
445
439
|
)
|
|
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
|
|
|
474
468
|
- **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
|
|
475
469
|
- **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
|
|
476
470
|
- **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
|
|
477
|
-
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
examples/__init__.py,sha256=qGLpphvrjQj0-zS9vP0Q07L-anDnmw7gFZJUEBgYG3U,158
|
|
2
2
|
examples/game_level_example.py,sha256=EOr-H0K79O3Zah4wWuqa5DLmT2iZGbfgxD-xSU2-dfI,2244
|
|
3
3
|
examples/lol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
examples/lol/pipeline_transformer_example.py,sha256=
|
|
4
|
+
examples/lol/pipeline_transformer_example.py,sha256=XVmm6Xya5z7JyOA0s-DISOlR2I1wpUthCyhRSt9n6qE,3402
|
|
5
5
|
examples/lol/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
examples/lol/data/subsample_lol_data.parquet,sha256=tl04XDslylECJUV1e0DGeqMb6D0Uh6_48NO6TykdgQI,343549
|
|
7
7
|
examples/lol/data/utils.py,sha256=Lt3XNNa5cavvFXHaTQ-GOPxSuWmPEfEO0CVXQEyF_s0,486
|
|
8
8
|
examples/nba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
examples/nba/cross_validation_example.py,sha256=
|
|
10
|
-
examples/nba/feature_engineering_example.py,sha256=
|
|
11
|
-
examples/nba/game_winner_example.py,sha256=
|
|
12
|
-
examples/nba/predictor_transformers_example.py,sha256=
|
|
9
|
+
examples/nba/cross_validation_example.py,sha256=XVnQJ5mqMou9z83ML5J0wS3gk-pa56sdvahJYQgZ8os,5056
|
|
10
|
+
examples/nba/feature_engineering_example.py,sha256=BDd5594Yi_56lGDqz3SYQkwT8NVZyFkgv3gKPCsAjz4,8197
|
|
11
|
+
examples/nba/game_winner_example.py,sha256=7VVHxGyU2uPjT9q6lDMHJ5KpkWp9gU8brxr_UZfuSHg,3189
|
|
12
|
+
examples/nba/predictor_transformers_example.py,sha256=Fl4BY_hVW0iYERolN6s-ZB2xv-UxOK547L6iI5t0r0Y,8807
|
|
13
13
|
examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
|
|
15
15
|
examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
|
|
16
|
-
spforge/__init__.py,sha256=
|
|
16
|
+
spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
|
|
17
17
|
spforge/autopipeline.py,sha256=ZUwv6Q6O8cD0u5TiSqG6lhW0j16RlSb160AzuOeL2R8,23186
|
|
18
18
|
spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
|
|
19
19
|
spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
|
|
@@ -43,18 +43,19 @@ spforge/feature_generator/_rolling_mean_binary.py,sha256=lmODy-o9Dd9pb8IlA7g4UyA
|
|
|
43
43
|
spforge/feature_generator/_rolling_mean_days.py,sha256=EZQmFmYVQB-JjZV5k8bOWnaTxNpPDCZAjdfdhiiG4r4,8415
|
|
44
44
|
spforge/feature_generator/_rolling_window.py,sha256=HT8LezsRIPNAlMEoP9oTPW2bKFu55ZSRnQZGST7fncw,8836
|
|
45
45
|
spforge/feature_generator/_utils.py,sha256=KDn33ia1OYJTK8THFpvc_uRiH_Bl3fImGqqbfzs0YA4,9654
|
|
46
|
-
spforge/hyperparameter_tuning/__init__.py,sha256=
|
|
47
|
-
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=
|
|
48
|
-
spforge/hyperparameter_tuning/_tuner.py,sha256=
|
|
46
|
+
spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8031Tu_rxlxyY,1015
|
|
47
|
+
spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
|
|
48
|
+
spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
|
|
49
49
|
spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
|
|
50
50
|
spforge/performance_transformers/_performance_manager.py,sha256=KwAga6dGhNkXi-MDW6LPjwk6VZwCcjo5L--jnk9aio8,9706
|
|
51
51
|
spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
|
|
52
|
-
spforge/ratings/__init__.py,sha256=
|
|
52
|
+
spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
|
|
53
53
|
spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
|
|
54
|
-
spforge/ratings/_player_rating.py,sha256=
|
|
54
|
+
spforge/ratings/_player_rating.py,sha256=MyqsyLSY6d7_bxDSnF8eWOyXpSCADWGdepdFSGM4cHw,51365
|
|
55
55
|
spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
|
|
56
56
|
spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
|
|
57
57
|
spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
|
|
58
|
+
spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
|
|
58
59
|
spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
|
|
59
60
|
spforge/ratings/start_rating_generator.py,sha256=_7hIJ9KRVCwsCoY1GIzY8cuOdHR8RH_BCMeMwQG3E04,6776
|
|
60
61
|
spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
|
|
@@ -70,15 +71,17 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
|
|
|
70
71
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
71
72
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
72
73
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
73
|
-
spforge-0.8.
|
|
74
|
+
spforge-0.8.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
74
75
|
tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
|
|
75
76
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
76
77
|
tests/test_feature_generator_pipeline.py,sha256=CAgBknWqawqYi5_hxcPmpxrLVa5elMHVv1VrSVRKXEA,17705
|
|
77
78
|
tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
|
|
78
79
|
tests/distributions/test_distribution.py,sha256=aU8hfCgliM80TES4WGjs9KFXpV8XghBGF7Hu9sqEVSE,10982
|
|
80
|
+
tests/end_to_end/test_estimator_hyperparameter_tuning.py,sha256=fZCJ9rrED2vT68B9ovmVA1cIG2pHRTjy9xzZLxxpEBo,2513
|
|
81
|
+
tests/end_to_end/test_league_start_rating_optimizer.py,sha256=Mmct2ixp4c6L7PGym8wZc7E-Csozryt1g4_o6OCc1uI,3141
|
|
79
82
|
tests/end_to_end/test_lol_player_kills.py,sha256=RJSYUbPrZ-RzSxGggj03yN0JKYeTB1JghVGYFMYia3Y,11891
|
|
80
83
|
tests/end_to_end/test_nba_player_points.py,sha256=kyzjo7QIcvpteps29Wix6IS_eJG9d1gHLeWtIHpkWMs,9066
|
|
81
|
-
tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=
|
|
84
|
+
tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=LXRkI_6Ho2kzJVbNAM17QFhx_MP9WdDJXCO9dWgJGNA,6491
|
|
82
85
|
tests/end_to_end/test_nba_prediction_consistency.py,sha256=o3DckJasx_I1ed6MhMYZUo2WSDvQ_p3HtJa9DCWTIYU,9857
|
|
83
86
|
tests/estimator/test_sklearn_estimator.py,sha256=tVfOP9Wx-tV1b6DcHbGxQHZQzNPA0Iobq8jTcUrk59U,48668
|
|
84
87
|
tests/feature_generator/test_lag.py,sha256=5Ffrv0V9cwkbkzRMPBe3_c_YNW-W2al-XH_acQIvdeg,19531
|
|
@@ -87,13 +90,14 @@ tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7AS
|
|
|
87
90
|
tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
|
|
88
91
|
tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
|
|
89
92
|
tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
|
|
93
|
+
tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
|
|
90
94
|
tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
|
|
91
95
|
tests/performance_transformers/test_performance_manager.py,sha256=bfC5GiBuzHw-mLmKeEzBUUPuKm0ayax2bsF1j88W8L0,10120
|
|
92
96
|
tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
|
|
93
|
-
tests/ratings/test_player_rating_generator.py,sha256=
|
|
97
|
+
tests/ratings/test_player_rating_generator.py,sha256=FGH3Tq0uFoSlkS_XMldsUKhsovBRBvzH9EbqjKvg2O0,59601
|
|
94
98
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
95
99
|
tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
|
|
96
|
-
tests/scorer/test_score.py,sha256=
|
|
100
|
+
tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
|
|
97
101
|
tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
|
|
98
102
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
99
103
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
@@ -101,7 +105,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
101
105
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
102
106
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
103
107
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
104
|
-
spforge-0.8.
|
|
105
|
-
spforge-0.8.
|
|
106
|
-
spforge-0.8.
|
|
107
|
-
spforge-0.8.
|
|
108
|
+
spforge-0.8.7.dist-info/METADATA,sha256=7vwprmmFvSpEL3lC0HqFZPbzxMi8mRzI0yOsa7pUlNQ,20047
|
|
109
|
+
spforge-0.8.7.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
110
|
+
spforge-0.8.7.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
111
|
+
spforge-0.8.7.dist-info/RECORD,,
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
from sklearn.linear_model import LogisticRegression
|
|
3
|
+
from sklearn.metrics import mean_absolute_error
|
|
4
|
+
|
|
5
|
+
from examples import get_sub_sample_nba_data
|
|
6
|
+
from spforge import AutoPipeline, ColumnNames, EstimatorHyperparameterTuner, ParamSpec
|
|
7
|
+
from spforge.cross_validator import MatchKFoldCrossValidator
|
|
8
|
+
from spforge.scorer import SklearnScorer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_nba_estimator_hyperparameter_tuning__workflow_completes():
|
|
12
|
+
df = get_sub_sample_nba_data(as_polars=True, as_pandas=False)
|
|
13
|
+
column_names = ColumnNames(
|
|
14
|
+
team_id="team_id",
|
|
15
|
+
match_id="game_id",
|
|
16
|
+
start_date="start_date",
|
|
17
|
+
player_id="player_id",
|
|
18
|
+
participation_weight="minutes_ratio",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
df = df.sort(
|
|
22
|
+
[
|
|
23
|
+
column_names.start_date,
|
|
24
|
+
column_names.match_id,
|
|
25
|
+
column_names.team_id,
|
|
26
|
+
column_names.player_id,
|
|
27
|
+
]
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
df = df.with_columns(
|
|
31
|
+
[
|
|
32
|
+
(pl.col("minutes") / pl.col("minutes").sum().over("game_id")).alias(
|
|
33
|
+
"minutes_ratio"
|
|
34
|
+
),
|
|
35
|
+
(pl.col("points") > pl.lit(10)).cast(pl.Int64).alias("points_over_10"),
|
|
36
|
+
]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
estimator = AutoPipeline(
|
|
40
|
+
estimator=LogisticRegression(max_iter=200),
|
|
41
|
+
estimator_features=["minutes", "minutes_ratio"],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
cv = MatchKFoldCrossValidator(
|
|
45
|
+
match_id_column_name=column_names.match_id,
|
|
46
|
+
date_column_name=column_names.start_date,
|
|
47
|
+
target_column="points_over_10",
|
|
48
|
+
estimator=estimator,
|
|
49
|
+
prediction_column_name="points_pred",
|
|
50
|
+
n_splits=2,
|
|
51
|
+
features=estimator.required_features,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
scorer = SklearnScorer(
|
|
55
|
+
scorer_function=mean_absolute_error,
|
|
56
|
+
pred_column="points_pred",
|
|
57
|
+
target="points_over_10",
|
|
58
|
+
validation_column="is_validation",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
tuner = EstimatorHyperparameterTuner(
|
|
62
|
+
estimator=estimator,
|
|
63
|
+
cross_validator=cv,
|
|
64
|
+
scorer=scorer,
|
|
65
|
+
direction="minimize",
|
|
66
|
+
param_search_space={
|
|
67
|
+
"C": ParamSpec(
|
|
68
|
+
param_type="float",
|
|
69
|
+
low=0.1,
|
|
70
|
+
high=2.0,
|
|
71
|
+
log=True,
|
|
72
|
+
),
|
|
73
|
+
},
|
|
74
|
+
n_trials=3,
|
|
75
|
+
show_progress_bar=False,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
result = tuner.optimize(df)
|
|
79
|
+
|
|
80
|
+
assert result.best_params is not None
|
|
81
|
+
assert isinstance(result.best_params, dict)
|
|
82
|
+
assert "estimator__C" in result.best_params
|
|
83
|
+
assert isinstance(result.best_value, float)
|
|
84
|
+
assert result.best_trial is not None
|
|
85
|
+
assert result.study is not None
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from spforge import ColumnNames
|
|
6
|
+
from spforge.ratings import (
|
|
7
|
+
LeagueStartRatingOptimizer,
|
|
8
|
+
PlayerRatingGenerator,
|
|
9
|
+
TeamRatingGenerator,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _player_df():
|
|
14
|
+
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
|
15
|
+
rows = []
|
|
16
|
+
for i, date in enumerate(dates):
|
|
17
|
+
mid = f"M{i}"
|
|
18
|
+
for player_idx in range(2):
|
|
19
|
+
rows.append(
|
|
20
|
+
{
|
|
21
|
+
"pid": f"A{player_idx}",
|
|
22
|
+
"tid": "TA",
|
|
23
|
+
"mid": mid,
|
|
24
|
+
"date": date,
|
|
25
|
+
"league": "LCK",
|
|
26
|
+
"perf": 0.4,
|
|
27
|
+
}
|
|
28
|
+
)
|
|
29
|
+
for player_idx in range(2):
|
|
30
|
+
rows.append(
|
|
31
|
+
{
|
|
32
|
+
"pid": f"B{player_idx}",
|
|
33
|
+
"tid": "TB",
|
|
34
|
+
"mid": mid,
|
|
35
|
+
"date": date,
|
|
36
|
+
"league": "LEC",
|
|
37
|
+
"perf": 0.6,
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
return pd.DataFrame(rows)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _team_df():
|
|
44
|
+
dates = pd.date_range("2024-01-01", periods=3, freq="D")
|
|
45
|
+
rows = []
|
|
46
|
+
for i, date in enumerate(dates):
|
|
47
|
+
mid = f"M{i}"
|
|
48
|
+
rows.extend(
|
|
49
|
+
[
|
|
50
|
+
{
|
|
51
|
+
"tid": "TA",
|
|
52
|
+
"mid": mid,
|
|
53
|
+
"date": date,
|
|
54
|
+
"league": "LCK",
|
|
55
|
+
"perf": 0.4,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"tid": "TB",
|
|
59
|
+
"mid": mid,
|
|
60
|
+
"date": date,
|
|
61
|
+
"league": "LEC",
|
|
62
|
+
"perf": 0.6,
|
|
63
|
+
},
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
return pd.DataFrame(rows)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.mark.parametrize("use_polars", [False, True])
|
|
70
|
+
def test_league_start_rating_optimizer__adjusts_player_leagues(use_polars):
|
|
71
|
+
cn = ColumnNames(
|
|
72
|
+
player_id="pid",
|
|
73
|
+
team_id="tid",
|
|
74
|
+
match_id="mid",
|
|
75
|
+
start_date="date",
|
|
76
|
+
league="league",
|
|
77
|
+
)
|
|
78
|
+
df = _player_df()
|
|
79
|
+
if use_polars:
|
|
80
|
+
df = pl.from_pandas(df)
|
|
81
|
+
generator = PlayerRatingGenerator(performance_column="perf", column_names=cn)
|
|
82
|
+
optimizer = LeagueStartRatingOptimizer(
|
|
83
|
+
rating_generator=generator,
|
|
84
|
+
n_iterations=1,
|
|
85
|
+
learning_rate=0.5,
|
|
86
|
+
min_cross_region_rows=1,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
result = optimizer.optimize(df)
|
|
90
|
+
|
|
91
|
+
assert result.league_ratings["LCK"] < 1000
|
|
92
|
+
assert result.league_ratings["LEC"] > 1000
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.mark.parametrize("use_polars", [False, True])
|
|
96
|
+
def test_league_start_rating_optimizer__adjusts_team_leagues(use_polars):
|
|
97
|
+
cn = ColumnNames(
|
|
98
|
+
team_id="tid",
|
|
99
|
+
match_id="mid",
|
|
100
|
+
start_date="date",
|
|
101
|
+
league="league",
|
|
102
|
+
)
|
|
103
|
+
df = _team_df()
|
|
104
|
+
if use_polars:
|
|
105
|
+
df = pl.from_pandas(df)
|
|
106
|
+
generator = TeamRatingGenerator(performance_column="perf", column_names=cn)
|
|
107
|
+
optimizer = LeagueStartRatingOptimizer(
|
|
108
|
+
rating_generator=generator,
|
|
109
|
+
n_iterations=1,
|
|
110
|
+
learning_rate=0.5,
|
|
111
|
+
min_cross_region_rows=1,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
result = optimizer.optimize(df)
|
|
115
|
+
|
|
116
|
+
assert result.league_ratings["LCK"] < 1000
|
|
117
|
+
assert result.league_ratings["LEC"] > 1000
|
|
@@ -97,6 +97,11 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
|
|
|
97
97
|
"confidence_max_sum",
|
|
98
98
|
"use_off_def_split",
|
|
99
99
|
"performance_predictor",
|
|
100
|
+
"start_team_weight",
|
|
101
|
+
"start_league_quantile",
|
|
102
|
+
"start_min_count_for_percentiles",
|
|
103
|
+
"start_min_match_count_team_rating",
|
|
104
|
+
"start_team_rating_subtract",
|
|
100
105
|
}
|
|
101
106
|
assert set(result.best_params.keys()) == expected_params
|
|
102
107
|
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
from sklearn.base import BaseEstimator
|
|
5
|
+
from sklearn.linear_model import LogisticRegression
|
|
6
|
+
|
|
7
|
+
from spforge import EstimatorHyperparameterTuner, ParamSpec
|
|
8
|
+
from spforge.cross_validator import MatchKFoldCrossValidator
|
|
9
|
+
from spforge.estimator import SkLearnEnhancerEstimator
|
|
10
|
+
from spforge.scorer import MeanBiasScorer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FakeLGBMClassifier(BaseEstimator):
|
|
14
|
+
__module__ = "lightgbm.sklearn"
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
n_estimators: int = 100,
|
|
19
|
+
num_leaves: int = 31,
|
|
20
|
+
max_depth: int = 5,
|
|
21
|
+
min_child_samples: int = 20,
|
|
22
|
+
subsample: float = 1.0,
|
|
23
|
+
subsample_freq: int = 1,
|
|
24
|
+
reg_alpha: float = 0.0,
|
|
25
|
+
reg_lambda: float = 0.0,
|
|
26
|
+
):
|
|
27
|
+
self.n_estimators = n_estimators
|
|
28
|
+
self.num_leaves = num_leaves
|
|
29
|
+
self.max_depth = max_depth
|
|
30
|
+
self.min_child_samples = min_child_samples
|
|
31
|
+
self.subsample = subsample
|
|
32
|
+
self.subsample_freq = subsample_freq
|
|
33
|
+
self.reg_alpha = reg_alpha
|
|
34
|
+
self.reg_lambda = reg_lambda
|
|
35
|
+
|
|
36
|
+
def fit(self, X, y):
|
|
37
|
+
self.classes_ = np.unique(y)
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def predict_proba(self, X):
|
|
41
|
+
n = len(X)
|
|
42
|
+
if len(self.classes_) < 2:
|
|
43
|
+
return np.ones((n, 1))
|
|
44
|
+
return np.tile([0.4, 0.6], (n, 1))
|
|
45
|
+
|
|
46
|
+
def predict(self, X):
|
|
47
|
+
n = len(X)
|
|
48
|
+
if len(self.classes_) == 1:
|
|
49
|
+
return np.full(n, self.classes_[0])
|
|
50
|
+
proba = self.predict_proba(X)
|
|
51
|
+
idx = np.argmax(proba, axis=1)
|
|
52
|
+
return np.array(self.classes_)[idx]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.fixture
|
|
56
|
+
def sample_df():
|
|
57
|
+
dates = pd.date_range("2024-01-01", periods=12, freq="D")
|
|
58
|
+
rows = []
|
|
59
|
+
for i, date in enumerate(dates):
|
|
60
|
+
rows.append(
|
|
61
|
+
{
|
|
62
|
+
"mid": f"M{i // 2}",
|
|
63
|
+
"date": date,
|
|
64
|
+
"x1": float(i),
|
|
65
|
+
"y": 1 if i % 2 == 0 else 0,
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
return pd.DataFrame(rows)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.fixture
|
|
72
|
+
def scorer():
|
|
73
|
+
return MeanBiasScorer(
|
|
74
|
+
pred_column="y_pred",
|
|
75
|
+
target="y",
|
|
76
|
+
validation_column="is_validation",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_estimator_tuner_requires_search_space(sample_df, scorer):
|
|
81
|
+
estimator = LogisticRegression()
|
|
82
|
+
|
|
83
|
+
cv = MatchKFoldCrossValidator(
|
|
84
|
+
match_id_column_name="mid",
|
|
85
|
+
date_column_name="date",
|
|
86
|
+
target_column="y",
|
|
87
|
+
estimator=estimator,
|
|
88
|
+
prediction_column_name="y_pred",
|
|
89
|
+
n_splits=2,
|
|
90
|
+
features=["x1"],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
tuner = EstimatorHyperparameterTuner(
|
|
94
|
+
estimator=estimator,
|
|
95
|
+
cross_validator=cv,
|
|
96
|
+
scorer=scorer,
|
|
97
|
+
direction="minimize",
|
|
98
|
+
n_trials=2,
|
|
99
|
+
show_progress_bar=False,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
with pytest.raises(ValueError, match="param_search_space is required"):
|
|
103
|
+
tuner.optimize(sample_df)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_estimator_tuner_custom_search_space(sample_df, scorer):
|
|
107
|
+
estimator = SkLearnEnhancerEstimator(estimator=LogisticRegression())
|
|
108
|
+
|
|
109
|
+
cv = MatchKFoldCrossValidator(
|
|
110
|
+
match_id_column_name="mid",
|
|
111
|
+
date_column_name="date",
|
|
112
|
+
target_column="y",
|
|
113
|
+
estimator=estimator,
|
|
114
|
+
prediction_column_name="y_pred",
|
|
115
|
+
n_splits=2,
|
|
116
|
+
features=["x1"],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
tuner = EstimatorHyperparameterTuner(
|
|
120
|
+
estimator=estimator,
|
|
121
|
+
cross_validator=cv,
|
|
122
|
+
scorer=scorer,
|
|
123
|
+
direction="minimize",
|
|
124
|
+
param_search_space={
|
|
125
|
+
"C": ParamSpec(
|
|
126
|
+
param_type="float",
|
|
127
|
+
low=0.1,
|
|
128
|
+
high=2.0,
|
|
129
|
+
log=True,
|
|
130
|
+
)
|
|
131
|
+
},
|
|
132
|
+
n_trials=2,
|
|
133
|
+
show_progress_bar=False,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
result = tuner.optimize(sample_df)
|
|
137
|
+
|
|
138
|
+
assert "estimator__C" in result.best_params
|
|
139
|
+
assert isinstance(result.best_value, float)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_estimator_tuner_lgbm_defaults(sample_df, scorer):
|
|
143
|
+
estimator = FakeLGBMClassifier()
|
|
144
|
+
|
|
145
|
+
cv = MatchKFoldCrossValidator(
|
|
146
|
+
match_id_column_name="mid",
|
|
147
|
+
date_column_name="date",
|
|
148
|
+
target_column="y",
|
|
149
|
+
estimator=estimator,
|
|
150
|
+
prediction_column_name="y_pred",
|
|
151
|
+
n_splits=2,
|
|
152
|
+
features=["x1"],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
tuner = EstimatorHyperparameterTuner(
|
|
156
|
+
estimator=estimator,
|
|
157
|
+
cross_validator=cv,
|
|
158
|
+
scorer=scorer,
|
|
159
|
+
direction="minimize",
|
|
160
|
+
n_trials=2,
|
|
161
|
+
show_progress_bar=False,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
result = tuner.optimize(sample_df)
|
|
165
|
+
|
|
166
|
+
assert "n_estimators" in result.best_params
|
|
167
|
+
assert isinstance(result.best_value, float)
|
|
@@ -1662,3 +1662,30 @@ def test_player_rating_team_with_strong_offense_and_weak_defense_gets_expected_r
|
|
|
1662
1662
|
|
|
1663
1663
|
assert a_off > start_rating
|
|
1664
1664
|
assert a_def < start_rating
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
def test_fit_transform__player_rating_difference_from_team_projected_feature(base_cn, sample_df):
|
|
1668
|
+
"""PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED computes player_off_rating - team_off_rating_projected."""
|
|
1669
|
+
gen = PlayerRatingGenerator(
|
|
1670
|
+
performance_column="perf",
|
|
1671
|
+
column_names=base_cn,
|
|
1672
|
+
auto_scale_performance=True,
|
|
1673
|
+
features_out=[
|
|
1674
|
+
RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED,
|
|
1675
|
+
RatingKnownFeatures.PLAYER_OFF_RATING,
|
|
1676
|
+
RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED,
|
|
1677
|
+
],
|
|
1678
|
+
)
|
|
1679
|
+
result = gen.fit_transform(sample_df)
|
|
1680
|
+
|
|
1681
|
+
diff_col = "player_rating_difference_from_team_projected_perf"
|
|
1682
|
+
player_col = "player_off_rating_perf"
|
|
1683
|
+
team_col = "team_off_rating_projected_perf"
|
|
1684
|
+
|
|
1685
|
+
assert diff_col in result.columns
|
|
1686
|
+
assert player_col in result.columns
|
|
1687
|
+
assert team_col in result.columns
|
|
1688
|
+
|
|
1689
|
+
for row in result.iter_rows(named=True):
|
|
1690
|
+
expected = row[player_col] - row[team_col]
|
|
1691
|
+
assert row[diff_col] == pytest.approx(expected, rel=1e-9)
|