spforge 0.8.4__py3-none-any.whl → 0.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.4
3
+ Version: 0.8.7
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: numpy>=1.23.4
19
19
  Requires-Dist: optuna>=3.4.0
20
- Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: pandas<3.0.0,>=2.0.0
21
21
  Requires-Dist: pendulum>=1.0.0
22
22
  Requires-Dist: scikit-learn>=1.4.0
23
23
  Requires-Dist: lightgbm>=4.0.0
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
85
85
  import pandas as pd
86
86
  from sklearn.linear_model import LogisticRegression
87
87
 
88
+ from examples import get_sub_sample_nba_data
88
89
  from spforge.autopipeline import AutoPipeline
89
90
  from spforge.data_structures import ColumnNames
90
- from spforge.ratings import RatingKnownFeatures
91
- from spforge.ratings._player_rating import PlayerRatingGenerator
91
+ from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
92
92
 
93
- df = pd.read_parquet("data/game_player_subsample.parquet")
93
+ df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
94
94
 
95
95
  # Step 1: Define column mappings for your dataset
96
96
  column_names = ColumnNames(
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
144
144
  pipeline = AutoPipeline(
145
145
  estimator=LogisticRegression(),
146
146
  granularity=["game_id", "team_id"], # Aggregate players → teams
147
- feature_names=rating_generator.features_out + ["location"], # Rating + home/away
147
+ estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
148
148
  )
149
149
 
150
150
  # Train on historical data
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
302
302
  prediction_column_name="points_pred",
303
303
  target_column="points",
304
304
  n_splits=3, # Number of temporal folds
305
- # Must include both feature_names AND context_feature_names
306
- features=pipeline.feature_names + pipeline.context_feature_names,
305
+ # Must include both estimator features and context features
306
+ features=pipeline.required_features,
307
307
  )
308
308
 
309
309
  # Generate validation predictions
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
330
330
  - `is_validation=1` marks validation rows, `is_validation=0` marks training rows
331
331
  - Use `validation_column` in scorer to score only validation rows
332
332
  - Training data always comes BEFORE validation data chronologically
333
- - Must pass both `feature_names` + `context_feature_names` to `features` parameter
333
+ - Must pass all required features (use `pipeline.required_features`)
334
334
  - Scorers can filter rows (e.g., only score players who played minutes > 0)
335
335
 
336
336
  See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
371
371
  # Approach 1: LGBMClassifier (direct probability prediction)
372
372
  pipeline_classifier = AutoPipeline(
373
373
  estimator=LGBMClassifier(verbose=-100, random_state=42),
374
- feature_names=features_pipeline.features_out,
374
+ estimator_features=features_pipeline.features_out,
375
375
  )
376
376
 
377
377
  # Approach 2: LGBMRegressor + NegativeBinomialEstimator
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
385
385
 
386
386
  pipeline_negbin = AutoPipeline(
387
387
  estimator=distribution_estimator,
388
- feature_names=features_pipeline.features_out,
389
- context_feature_names=[
390
- column_names.player_id,
391
- column_names.start_date,
392
- column_names.team_id,
393
- column_names.match_id,
394
- ],
388
+ estimator_features=features_pipeline.features_out,
395
389
  predictor_transformers=[
396
390
  EstimatorTransformer(
397
391
  prediction_column_name="points_estimate",
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
439
433
  # Stage 2: Refine estimate using Stage 1 output
440
434
  player_points_pipeline = AutoPipeline(
441
435
  estimator=LGBMRegressor(verbose=-100, n_estimators=50),
442
- feature_names=features_pipeline.features_out, # Original features
436
+ estimator_features=features_pipeline.features_out, # Original features
443
437
  # predictor_transformers execute first, adding their predictions
444
438
  predictor_transformers=[points_estimate_transformer],
445
439
  )
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
474
468
  - **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
475
469
  - **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
476
470
  - **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
477
-
@@ -1,19 +1,19 @@
1
1
  examples/__init__.py,sha256=qGLpphvrjQj0-zS9vP0Q07L-anDnmw7gFZJUEBgYG3U,158
2
2
  examples/game_level_example.py,sha256=EOr-H0K79O3Zah4wWuqa5DLmT2iZGbfgxD-xSU2-dfI,2244
3
3
  examples/lol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- examples/lol/pipeline_transformer_example.py,sha256=HousFjE2dbJgdONur4PxwhW2SGQIJGI8aZUIb4TEvIo,4317
4
+ examples/lol/pipeline_transformer_example.py,sha256=XVmm6Xya5z7JyOA0s-DISOlR2I1wpUthCyhRSt9n6qE,3402
5
5
  examples/lol/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  examples/lol/data/subsample_lol_data.parquet,sha256=tl04XDslylECJUV1e0DGeqMb6D0Uh6_48NO6TykdgQI,343549
7
7
  examples/lol/data/utils.py,sha256=Lt3XNNa5cavvFXHaTQ-GOPxSuWmPEfEO0CVXQEyF_s0,486
8
8
  examples/nba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- examples/nba/cross_validation_example.py,sha256=WD_52vO9m1rILVfXXf8uIb_odpaK-TZ4iOewHU19lTg,5281
10
- examples/nba/feature_engineering_example.py,sha256=0OHJ2w6vkHvFB2bYwIQQb8HjFA5bfXc7tLmngnahG74,7708
11
- examples/nba/game_winner_example.py,sha256=RNKYSwpArr08yDWOtkxjx7eAldf97WYDBBfb3tsVSZc,2975
12
- examples/nba/predictor_transformers_example.py,sha256=mPXRVPx4J5VZtxYH89k7pwh7_EGZ0CXoNHeh2s0AOp8,8499
9
+ examples/nba/cross_validation_example.py,sha256=XVnQJ5mqMou9z83ML5J0wS3gk-pa56sdvahJYQgZ8os,5056
10
+ examples/nba/feature_engineering_example.py,sha256=BDd5594Yi_56lGDqz3SYQkwT8NVZyFkgv3gKPCsAjz4,8197
11
+ examples/nba/game_winner_example.py,sha256=7VVHxGyU2uPjT9q6lDMHJ5KpkWp9gU8brxr_UZfuSHg,3189
12
+ examples/nba/predictor_transformers_example.py,sha256=Fl4BY_hVW0iYERolN6s-ZB2xv-UxOK547L6iI5t0r0Y,8807
13
13
  examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
15
15
  examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
16
- spforge/__init__.py,sha256=5d9zzBxaaXj2JeBNwfUwuV7Ll5FERHyXONsFiuKhHSQ,402
16
+ spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
17
17
  spforge/autopipeline.py,sha256=ZUwv6Q6O8cD0u5TiSqG6lhW0j16RlSb160AzuOeL2R8,23186
18
18
  spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
19
19
  spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
@@ -43,18 +43,19 @@ spforge/feature_generator/_rolling_mean_binary.py,sha256=lmODy-o9Dd9pb8IlA7g4UyA
43
43
  spforge/feature_generator/_rolling_mean_days.py,sha256=EZQmFmYVQB-JjZV5k8bOWnaTxNpPDCZAjdfdhiiG4r4,8415
44
44
  spforge/feature_generator/_rolling_window.py,sha256=HT8LezsRIPNAlMEoP9oTPW2bKFu55ZSRnQZGST7fncw,8836
45
45
  spforge/feature_generator/_utils.py,sha256=KDn33ia1OYJTK8THFpvc_uRiH_Bl3fImGqqbfzs0YA4,9654
46
- spforge/hyperparameter_tuning/__init__.py,sha256=pp7aWzydObRawFLcGiaUrUduEQIjln2uif9nKCTk6l4,509
47
- spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=19sHW8zlyG88xZdyqSrp9gFI5oLb-f6THlbhYAtTfmY,3534
48
- spforge/hyperparameter_tuning/_tuner.py,sha256=S70IEmHxl36LaUPl_wc_2mo46qUuH8t0eH0aXuCuGfA,9586
46
+ spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8031Tu_rxlxyY,1015
47
+ spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
48
+ spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
49
49
  spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
50
50
  spforge/performance_transformers/_performance_manager.py,sha256=KwAga6dGhNkXi-MDW6LPjwk6VZwCcjo5L--jnk9aio8,9706
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
- spforge/ratings/__init__.py,sha256=jAa_xF2e-96FoyD57EYFKE-mO6OnK23siJOB4tzbyek,387
52
+ spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
54
- spforge/ratings/_player_rating.py,sha256=05CuiSa2_uM0xtYpxT00OOxU_TmW4qt6dsXvn7seFss,50861
54
+ spforge/ratings/_player_rating.py,sha256=MyqsyLSY6d7_bxDSnF8eWOyXpSCADWGdepdFSGM4cHw,51365
55
55
  spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
58
+ spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
58
59
  spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
59
60
  spforge/ratings/start_rating_generator.py,sha256=_7hIJ9KRVCwsCoY1GIzY8cuOdHR8RH_BCMeMwQG3E04,6776
60
61
  spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
@@ -70,15 +71,17 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
70
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
71
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
72
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
73
- spforge-0.8.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
75
  tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
75
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
76
77
  tests/test_feature_generator_pipeline.py,sha256=CAgBknWqawqYi5_hxcPmpxrLVa5elMHVv1VrSVRKXEA,17705
77
78
  tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
78
79
  tests/distributions/test_distribution.py,sha256=aU8hfCgliM80TES4WGjs9KFXpV8XghBGF7Hu9sqEVSE,10982
80
+ tests/end_to_end/test_estimator_hyperparameter_tuning.py,sha256=fZCJ9rrED2vT68B9ovmVA1cIG2pHRTjy9xzZLxxpEBo,2513
81
+ tests/end_to_end/test_league_start_rating_optimizer.py,sha256=Mmct2ixp4c6L7PGym8wZc7E-Csozryt1g4_o6OCc1uI,3141
79
82
  tests/end_to_end/test_lol_player_kills.py,sha256=RJSYUbPrZ-RzSxGggj03yN0JKYeTB1JghVGYFMYia3Y,11891
80
83
  tests/end_to_end/test_nba_player_points.py,sha256=kyzjo7QIcvpteps29Wix6IS_eJG9d1gHLeWtIHpkWMs,9066
81
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=eOsTSVWv16bc0l_nCxH4x8jF-gsmn4Ttfv92mHqSXzc,6303
84
+ tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=LXRkI_6Ho2kzJVbNAM17QFhx_MP9WdDJXCO9dWgJGNA,6491
82
85
  tests/end_to_end/test_nba_prediction_consistency.py,sha256=o3DckJasx_I1ed6MhMYZUo2WSDvQ_p3HtJa9DCWTIYU,9857
83
86
  tests/estimator/test_sklearn_estimator.py,sha256=tVfOP9Wx-tV1b6DcHbGxQHZQzNPA0Iobq8jTcUrk59U,48668
84
87
  tests/feature_generator/test_lag.py,sha256=5Ffrv0V9cwkbkzRMPBe3_c_YNW-W2al-XH_acQIvdeg,19531
@@ -87,13 +90,14 @@ tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7AS
87
90
  tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
88
91
  tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
89
92
  tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
93
+ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
90
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
91
95
  tests/performance_transformers/test_performance_manager.py,sha256=bfC5GiBuzHw-mLmKeEzBUUPuKm0ayax2bsF1j88W8L0,10120
92
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
93
- tests/ratings/test_player_rating_generator.py,sha256=3mjqlX159QqOlBoY3r_TFkvLwpE4zlLE0fiqpbfk3ps,58547
97
+ tests/ratings/test_player_rating_generator.py,sha256=FGH3Tq0uFoSlkS_XMldsUKhsovBRBvzH9EbqjKvg2O0,59601
94
98
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
95
99
  tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
96
- tests/scorer/test_score.py,sha256=KTrGJypQEpU8tmgJ6LU8wK1SRC3PLUXFzZIyiA-UY7U,71749
100
+ tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
97
101
  tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
98
102
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
99
103
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
@@ -101,7 +105,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
101
105
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
102
106
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
103
107
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
104
- spforge-0.8.4.dist-info/METADATA,sha256=XNaD0lL_puuuYmZU59VjenOYpLSRCSx_nswef8yCZ4M,20219
105
- spforge-0.8.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
106
- spforge-0.8.4.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
107
- spforge-0.8.4.dist-info/RECORD,,
108
+ spforge-0.8.7.dist-info/METADATA,sha256=7vwprmmFvSpEL3lC0HqFZPbzxMi8mRzI0yOsa7pUlNQ,20047
109
+ spforge-0.8.7.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
110
+ spforge-0.8.7.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
111
+ spforge-0.8.7.dist-info/RECORD,,
@@ -0,0 +1,85 @@
1
+ import polars as pl
2
+ from sklearn.linear_model import LogisticRegression
3
+ from sklearn.metrics import mean_absolute_error
4
+
5
+ from examples import get_sub_sample_nba_data
6
+ from spforge import AutoPipeline, ColumnNames, EstimatorHyperparameterTuner, ParamSpec
7
+ from spforge.cross_validator import MatchKFoldCrossValidator
8
+ from spforge.scorer import SklearnScorer
9
+
10
+
11
+ def test_nba_estimator_hyperparameter_tuning__workflow_completes():
12
+ df = get_sub_sample_nba_data(as_polars=True, as_pandas=False)
13
+ column_names = ColumnNames(
14
+ team_id="team_id",
15
+ match_id="game_id",
16
+ start_date="start_date",
17
+ player_id="player_id",
18
+ participation_weight="minutes_ratio",
19
+ )
20
+
21
+ df = df.sort(
22
+ [
23
+ column_names.start_date,
24
+ column_names.match_id,
25
+ column_names.team_id,
26
+ column_names.player_id,
27
+ ]
28
+ )
29
+
30
+ df = df.with_columns(
31
+ [
32
+ (pl.col("minutes") / pl.col("minutes").sum().over("game_id")).alias(
33
+ "minutes_ratio"
34
+ ),
35
+ (pl.col("points") > pl.lit(10)).cast(pl.Int64).alias("points_over_10"),
36
+ ]
37
+ )
38
+
39
+ estimator = AutoPipeline(
40
+ estimator=LogisticRegression(max_iter=200),
41
+ estimator_features=["minutes", "minutes_ratio"],
42
+ )
43
+
44
+ cv = MatchKFoldCrossValidator(
45
+ match_id_column_name=column_names.match_id,
46
+ date_column_name=column_names.start_date,
47
+ target_column="points_over_10",
48
+ estimator=estimator,
49
+ prediction_column_name="points_pred",
50
+ n_splits=2,
51
+ features=estimator.required_features,
52
+ )
53
+
54
+ scorer = SklearnScorer(
55
+ scorer_function=mean_absolute_error,
56
+ pred_column="points_pred",
57
+ target="points_over_10",
58
+ validation_column="is_validation",
59
+ )
60
+
61
+ tuner = EstimatorHyperparameterTuner(
62
+ estimator=estimator,
63
+ cross_validator=cv,
64
+ scorer=scorer,
65
+ direction="minimize",
66
+ param_search_space={
67
+ "C": ParamSpec(
68
+ param_type="float",
69
+ low=0.1,
70
+ high=2.0,
71
+ log=True,
72
+ ),
73
+ },
74
+ n_trials=3,
75
+ show_progress_bar=False,
76
+ )
77
+
78
+ result = tuner.optimize(df)
79
+
80
+ assert result.best_params is not None
81
+ assert isinstance(result.best_params, dict)
82
+ assert "estimator__C" in result.best_params
83
+ assert isinstance(result.best_value, float)
84
+ assert result.best_trial is not None
85
+ assert result.study is not None
@@ -0,0 +1,117 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+ import pytest
4
+
5
+ from spforge import ColumnNames
6
+ from spforge.ratings import (
7
+ LeagueStartRatingOptimizer,
8
+ PlayerRatingGenerator,
9
+ TeamRatingGenerator,
10
+ )
11
+
12
+
13
+ def _player_df():
14
+ dates = pd.date_range("2024-01-01", periods=3, freq="D")
15
+ rows = []
16
+ for i, date in enumerate(dates):
17
+ mid = f"M{i}"
18
+ for player_idx in range(2):
19
+ rows.append(
20
+ {
21
+ "pid": f"A{player_idx}",
22
+ "tid": "TA",
23
+ "mid": mid,
24
+ "date": date,
25
+ "league": "LCK",
26
+ "perf": 0.4,
27
+ }
28
+ )
29
+ for player_idx in range(2):
30
+ rows.append(
31
+ {
32
+ "pid": f"B{player_idx}",
33
+ "tid": "TB",
34
+ "mid": mid,
35
+ "date": date,
36
+ "league": "LEC",
37
+ "perf": 0.6,
38
+ }
39
+ )
40
+ return pd.DataFrame(rows)
41
+
42
+
43
+ def _team_df():
44
+ dates = pd.date_range("2024-01-01", periods=3, freq="D")
45
+ rows = []
46
+ for i, date in enumerate(dates):
47
+ mid = f"M{i}"
48
+ rows.extend(
49
+ [
50
+ {
51
+ "tid": "TA",
52
+ "mid": mid,
53
+ "date": date,
54
+ "league": "LCK",
55
+ "perf": 0.4,
56
+ },
57
+ {
58
+ "tid": "TB",
59
+ "mid": mid,
60
+ "date": date,
61
+ "league": "LEC",
62
+ "perf": 0.6,
63
+ },
64
+ ]
65
+ )
66
+ return pd.DataFrame(rows)
67
+
68
+
69
+ @pytest.mark.parametrize("use_polars", [False, True])
70
+ def test_league_start_rating_optimizer__adjusts_player_leagues(use_polars):
71
+ cn = ColumnNames(
72
+ player_id="pid",
73
+ team_id="tid",
74
+ match_id="mid",
75
+ start_date="date",
76
+ league="league",
77
+ )
78
+ df = _player_df()
79
+ if use_polars:
80
+ df = pl.from_pandas(df)
81
+ generator = PlayerRatingGenerator(performance_column="perf", column_names=cn)
82
+ optimizer = LeagueStartRatingOptimizer(
83
+ rating_generator=generator,
84
+ n_iterations=1,
85
+ learning_rate=0.5,
86
+ min_cross_region_rows=1,
87
+ )
88
+
89
+ result = optimizer.optimize(df)
90
+
91
+ assert result.league_ratings["LCK"] < 1000
92
+ assert result.league_ratings["LEC"] > 1000
93
+
94
+
95
+ @pytest.mark.parametrize("use_polars", [False, True])
96
+ def test_league_start_rating_optimizer__adjusts_team_leagues(use_polars):
97
+ cn = ColumnNames(
98
+ team_id="tid",
99
+ match_id="mid",
100
+ start_date="date",
101
+ league="league",
102
+ )
103
+ df = _team_df()
104
+ if use_polars:
105
+ df = pl.from_pandas(df)
106
+ generator = TeamRatingGenerator(performance_column="perf", column_names=cn)
107
+ optimizer = LeagueStartRatingOptimizer(
108
+ rating_generator=generator,
109
+ n_iterations=1,
110
+ learning_rate=0.5,
111
+ min_cross_region_rows=1,
112
+ )
113
+
114
+ result = optimizer.optimize(df)
115
+
116
+ assert result.league_ratings["LCK"] < 1000
117
+ assert result.league_ratings["LEC"] > 1000
@@ -97,6 +97,11 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
97
97
  "confidence_max_sum",
98
98
  "use_off_def_split",
99
99
  "performance_predictor",
100
+ "start_team_weight",
101
+ "start_league_quantile",
102
+ "start_min_count_for_percentiles",
103
+ "start_min_match_count_team_rating",
104
+ "start_team_rating_subtract",
100
105
  }
101
106
  assert set(result.best_params.keys()) == expected_params
102
107
 
@@ -0,0 +1,167 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import pytest
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.linear_model import LogisticRegression
6
+
7
+ from spforge import EstimatorHyperparameterTuner, ParamSpec
8
+ from spforge.cross_validator import MatchKFoldCrossValidator
9
+ from spforge.estimator import SkLearnEnhancerEstimator
10
+ from spforge.scorer import MeanBiasScorer
11
+
12
+
13
+ class FakeLGBMClassifier(BaseEstimator):
14
+ __module__ = "lightgbm.sklearn"
15
+
16
+ def __init__(
17
+ self,
18
+ n_estimators: int = 100,
19
+ num_leaves: int = 31,
20
+ max_depth: int = 5,
21
+ min_child_samples: int = 20,
22
+ subsample: float = 1.0,
23
+ subsample_freq: int = 1,
24
+ reg_alpha: float = 0.0,
25
+ reg_lambda: float = 0.0,
26
+ ):
27
+ self.n_estimators = n_estimators
28
+ self.num_leaves = num_leaves
29
+ self.max_depth = max_depth
30
+ self.min_child_samples = min_child_samples
31
+ self.subsample = subsample
32
+ self.subsample_freq = subsample_freq
33
+ self.reg_alpha = reg_alpha
34
+ self.reg_lambda = reg_lambda
35
+
36
+ def fit(self, X, y):
37
+ self.classes_ = np.unique(y)
38
+ return self
39
+
40
+ def predict_proba(self, X):
41
+ n = len(X)
42
+ if len(self.classes_) < 2:
43
+ return np.ones((n, 1))
44
+ return np.tile([0.4, 0.6], (n, 1))
45
+
46
+ def predict(self, X):
47
+ n = len(X)
48
+ if len(self.classes_) == 1:
49
+ return np.full(n, self.classes_[0])
50
+ proba = self.predict_proba(X)
51
+ idx = np.argmax(proba, axis=1)
52
+ return np.array(self.classes_)[idx]
53
+
54
+
55
+ @pytest.fixture
56
+ def sample_df():
57
+ dates = pd.date_range("2024-01-01", periods=12, freq="D")
58
+ rows = []
59
+ for i, date in enumerate(dates):
60
+ rows.append(
61
+ {
62
+ "mid": f"M{i // 2}",
63
+ "date": date,
64
+ "x1": float(i),
65
+ "y": 1 if i % 2 == 0 else 0,
66
+ }
67
+ )
68
+ return pd.DataFrame(rows)
69
+
70
+
71
+ @pytest.fixture
72
+ def scorer():
73
+ return MeanBiasScorer(
74
+ pred_column="y_pred",
75
+ target="y",
76
+ validation_column="is_validation",
77
+ )
78
+
79
+
80
+ def test_estimator_tuner_requires_search_space(sample_df, scorer):
81
+ estimator = LogisticRegression()
82
+
83
+ cv = MatchKFoldCrossValidator(
84
+ match_id_column_name="mid",
85
+ date_column_name="date",
86
+ target_column="y",
87
+ estimator=estimator,
88
+ prediction_column_name="y_pred",
89
+ n_splits=2,
90
+ features=["x1"],
91
+ )
92
+
93
+ tuner = EstimatorHyperparameterTuner(
94
+ estimator=estimator,
95
+ cross_validator=cv,
96
+ scorer=scorer,
97
+ direction="minimize",
98
+ n_trials=2,
99
+ show_progress_bar=False,
100
+ )
101
+
102
+ with pytest.raises(ValueError, match="param_search_space is required"):
103
+ tuner.optimize(sample_df)
104
+
105
+
106
+ def test_estimator_tuner_custom_search_space(sample_df, scorer):
107
+ estimator = SkLearnEnhancerEstimator(estimator=LogisticRegression())
108
+
109
+ cv = MatchKFoldCrossValidator(
110
+ match_id_column_name="mid",
111
+ date_column_name="date",
112
+ target_column="y",
113
+ estimator=estimator,
114
+ prediction_column_name="y_pred",
115
+ n_splits=2,
116
+ features=["x1"],
117
+ )
118
+
119
+ tuner = EstimatorHyperparameterTuner(
120
+ estimator=estimator,
121
+ cross_validator=cv,
122
+ scorer=scorer,
123
+ direction="minimize",
124
+ param_search_space={
125
+ "C": ParamSpec(
126
+ param_type="float",
127
+ low=0.1,
128
+ high=2.0,
129
+ log=True,
130
+ )
131
+ },
132
+ n_trials=2,
133
+ show_progress_bar=False,
134
+ )
135
+
136
+ result = tuner.optimize(sample_df)
137
+
138
+ assert "estimator__C" in result.best_params
139
+ assert isinstance(result.best_value, float)
140
+
141
+
142
+ def test_estimator_tuner_lgbm_defaults(sample_df, scorer):
143
+ estimator = FakeLGBMClassifier()
144
+
145
+ cv = MatchKFoldCrossValidator(
146
+ match_id_column_name="mid",
147
+ date_column_name="date",
148
+ target_column="y",
149
+ estimator=estimator,
150
+ prediction_column_name="y_pred",
151
+ n_splits=2,
152
+ features=["x1"],
153
+ )
154
+
155
+ tuner = EstimatorHyperparameterTuner(
156
+ estimator=estimator,
157
+ cross_validator=cv,
158
+ scorer=scorer,
159
+ direction="minimize",
160
+ n_trials=2,
161
+ show_progress_bar=False,
162
+ )
163
+
164
+ result = tuner.optimize(sample_df)
165
+
166
+ assert "n_estimators" in result.best_params
167
+ assert isinstance(result.best_value, float)
@@ -1662,3 +1662,30 @@ def test_player_rating_team_with_strong_offense_and_weak_defense_gets_expected_r
1662
1662
 
1663
1663
  assert a_off > start_rating
1664
1664
  assert a_def < start_rating
1665
+
1666
+
1667
+ def test_fit_transform__player_rating_difference_from_team_projected_feature(base_cn, sample_df):
1668
+ """PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED computes player_off_rating - team_off_rating_projected."""
1669
+ gen = PlayerRatingGenerator(
1670
+ performance_column="perf",
1671
+ column_names=base_cn,
1672
+ auto_scale_performance=True,
1673
+ features_out=[
1674
+ RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED,
1675
+ RatingKnownFeatures.PLAYER_OFF_RATING,
1676
+ RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED,
1677
+ ],
1678
+ )
1679
+ result = gen.fit_transform(sample_df)
1680
+
1681
+ diff_col = "player_rating_difference_from_team_projected_perf"
1682
+ player_col = "player_off_rating_perf"
1683
+ team_col = "team_off_rating_projected_perf"
1684
+
1685
+ assert diff_col in result.columns
1686
+ assert player_col in result.columns
1687
+ assert team_col in result.columns
1688
+
1689
+ for row in result.iter_rows(named=True):
1690
+ expected = row[player_col] - row[team_col]
1691
+ assert row[diff_col] == pytest.approx(expected, rel=1e-9)