spforge 0.8.5__tar.gz → 0.8.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.5/spforge.egg-info → spforge-0.8.7}/PKG-INFO +11 -18
- {spforge-0.8.5 → spforge-0.8.7}/README.md +10 -17
- spforge-0.8.7/examples/lol/pipeline_transformer_example.py +106 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/cross_validation_example.py +4 -11
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/feature_engineering_example.py +33 -15
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/game_winner_example.py +24 -14
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/predictor_transformers_example.py +29 -16
- {spforge-0.8.5 → spforge-0.8.7}/pyproject.toml +1 -1
- {spforge-0.8.5 → spforge-0.8.7}/spforge/hyperparameter_tuning/_default_search_spaces.py +26 -1
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/__init__.py +4 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/_player_rating.py +11 -0
- spforge-0.8.7/spforge/ratings/league_start_rating_optimizer.py +201 -0
- {spforge-0.8.5 → spforge-0.8.7/spforge.egg-info}/PKG-INFO +11 -18
- {spforge-0.8.5 → spforge-0.8.7}/spforge.egg-info/SOURCES.txt +2 -0
- spforge-0.8.7/tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/ratings/test_player_rating_generator.py +27 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/scorer/test_score.py +90 -0
- spforge-0.8.5/examples/lol/pipeline_transformer_example.py +0 -123
- {spforge-0.8.5 → spforge-0.8.7}/LICENSE +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/MANIFEST.in +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/game_level_example.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/lol/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/setup.cfg +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/autopipeline.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/data_structures.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_group_by_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/ratings/utils.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/scorer/_score.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_other_transformer.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge/utils.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/test_autopipeline.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.5 → spforge-0.8.7}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.7
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
|
|
|
85
85
|
import pandas as pd
|
|
86
86
|
from sklearn.linear_model import LogisticRegression
|
|
87
87
|
|
|
88
|
+
from examples import get_sub_sample_nba_data
|
|
88
89
|
from spforge.autopipeline import AutoPipeline
|
|
89
90
|
from spforge.data_structures import ColumnNames
|
|
90
|
-
from spforge.ratings import RatingKnownFeatures
|
|
91
|
-
from spforge.ratings._player_rating import PlayerRatingGenerator
|
|
91
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
92
92
|
|
|
93
|
-
df =
|
|
93
|
+
df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
|
|
94
94
|
|
|
95
95
|
# Step 1: Define column mappings for your dataset
|
|
96
96
|
column_names = ColumnNames(
|
|
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
|
|
|
144
144
|
pipeline = AutoPipeline(
|
|
145
145
|
estimator=LogisticRegression(),
|
|
146
146
|
granularity=["game_id", "team_id"], # Aggregate players → teams
|
|
147
|
-
|
|
147
|
+
estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
# Train on historical data
|
|
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
|
|
|
302
302
|
prediction_column_name="points_pred",
|
|
303
303
|
target_column="points",
|
|
304
304
|
n_splits=3, # Number of temporal folds
|
|
305
|
-
# Must include both
|
|
306
|
-
features=pipeline.
|
|
305
|
+
# Must include both estimator features and context features
|
|
306
|
+
features=pipeline.required_features,
|
|
307
307
|
)
|
|
308
308
|
|
|
309
309
|
# Generate validation predictions
|
|
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
|
|
|
330
330
|
- `is_validation=1` marks validation rows, `is_validation=0` marks training rows
|
|
331
331
|
- Use `validation_column` in scorer to score only validation rows
|
|
332
332
|
- Training data always comes BEFORE validation data chronologically
|
|
333
|
-
- Must pass
|
|
333
|
+
- Must pass all required features (use `pipeline.required_features`)
|
|
334
334
|
- Scorers can filter rows (e.g., only score players who played minutes > 0)
|
|
335
335
|
|
|
336
336
|
See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
|
|
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
|
|
|
371
371
|
# Approach 1: LGBMClassifier (direct probability prediction)
|
|
372
372
|
pipeline_classifier = AutoPipeline(
|
|
373
373
|
estimator=LGBMClassifier(verbose=-100, random_state=42),
|
|
374
|
-
|
|
374
|
+
estimator_features=features_pipeline.features_out,
|
|
375
375
|
)
|
|
376
376
|
|
|
377
377
|
# Approach 2: LGBMRegressor + NegativeBinomialEstimator
|
|
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
|
|
|
385
385
|
|
|
386
386
|
pipeline_negbin = AutoPipeline(
|
|
387
387
|
estimator=distribution_estimator,
|
|
388
|
-
|
|
389
|
-
context_feature_names=[
|
|
390
|
-
column_names.player_id,
|
|
391
|
-
column_names.start_date,
|
|
392
|
-
column_names.team_id,
|
|
393
|
-
column_names.match_id,
|
|
394
|
-
],
|
|
388
|
+
estimator_features=features_pipeline.features_out,
|
|
395
389
|
predictor_transformers=[
|
|
396
390
|
EstimatorTransformer(
|
|
397
391
|
prediction_column_name="points_estimate",
|
|
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
|
|
|
439
433
|
# Stage 2: Refine estimate using Stage 1 output
|
|
440
434
|
player_points_pipeline = AutoPipeline(
|
|
441
435
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
442
|
-
|
|
436
|
+
estimator_features=features_pipeline.features_out, # Original features
|
|
443
437
|
# predictor_transformers execute first, adding their predictions
|
|
444
438
|
predictor_transformers=[points_estimate_transformer],
|
|
445
439
|
)
|
|
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
|
|
|
474
468
|
- **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
|
|
475
469
|
- **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
|
|
476
470
|
- **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
|
|
477
|
-
|
|
@@ -57,12 +57,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
|
|
|
57
57
|
import pandas as pd
|
|
58
58
|
from sklearn.linear_model import LogisticRegression
|
|
59
59
|
|
|
60
|
+
from examples import get_sub_sample_nba_data
|
|
60
61
|
from spforge.autopipeline import AutoPipeline
|
|
61
62
|
from spforge.data_structures import ColumnNames
|
|
62
|
-
from spforge.ratings import RatingKnownFeatures
|
|
63
|
-
from spforge.ratings._player_rating import PlayerRatingGenerator
|
|
63
|
+
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
64
64
|
|
|
65
|
-
df =
|
|
65
|
+
df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
|
|
66
66
|
|
|
67
67
|
# Step 1: Define column mappings for your dataset
|
|
68
68
|
column_names = ColumnNames(
|
|
@@ -116,7 +116,7 @@ historical_df = rating_generator.fit_transform(historical_df)
|
|
|
116
116
|
pipeline = AutoPipeline(
|
|
117
117
|
estimator=LogisticRegression(),
|
|
118
118
|
granularity=["game_id", "team_id"], # Aggregate players → teams
|
|
119
|
-
|
|
119
|
+
estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
|
|
120
120
|
)
|
|
121
121
|
|
|
122
122
|
# Train on historical data
|
|
@@ -274,8 +274,8 @@ cross_validator = MatchKFoldCrossValidator(
|
|
|
274
274
|
prediction_column_name="points_pred",
|
|
275
275
|
target_column="points",
|
|
276
276
|
n_splits=3, # Number of temporal folds
|
|
277
|
-
# Must include both
|
|
278
|
-
features=pipeline.
|
|
277
|
+
# Must include both estimator features and context features
|
|
278
|
+
features=pipeline.required_features,
|
|
279
279
|
)
|
|
280
280
|
|
|
281
281
|
# Generate validation predictions
|
|
@@ -302,7 +302,7 @@ print(f"Validation MAE: {mae:.2f}")
|
|
|
302
302
|
- `is_validation=1` marks validation rows, `is_validation=0` marks training rows
|
|
303
303
|
- Use `validation_column` in scorer to score only validation rows
|
|
304
304
|
- Training data always comes BEFORE validation data chronologically
|
|
305
|
-
- Must pass
|
|
305
|
+
- Must pass all required features (use `pipeline.required_features`)
|
|
306
306
|
- Scorers can filter rows (e.g., only score players who played minutes > 0)
|
|
307
307
|
|
|
308
308
|
See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
|
|
@@ -343,7 +343,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
|
|
|
343
343
|
# Approach 1: LGBMClassifier (direct probability prediction)
|
|
344
344
|
pipeline_classifier = AutoPipeline(
|
|
345
345
|
estimator=LGBMClassifier(verbose=-100, random_state=42),
|
|
346
|
-
|
|
346
|
+
estimator_features=features_pipeline.features_out,
|
|
347
347
|
)
|
|
348
348
|
|
|
349
349
|
# Approach 2: LGBMRegressor + NegativeBinomialEstimator
|
|
@@ -357,13 +357,7 @@ distribution_estimator = NegativeBinomialEstimator(
|
|
|
357
357
|
|
|
358
358
|
pipeline_negbin = AutoPipeline(
|
|
359
359
|
estimator=distribution_estimator,
|
|
360
|
-
|
|
361
|
-
context_feature_names=[
|
|
362
|
-
column_names.player_id,
|
|
363
|
-
column_names.start_date,
|
|
364
|
-
column_names.team_id,
|
|
365
|
-
column_names.match_id,
|
|
366
|
-
],
|
|
360
|
+
estimator_features=features_pipeline.features_out,
|
|
367
361
|
predictor_transformers=[
|
|
368
362
|
EstimatorTransformer(
|
|
369
363
|
prediction_column_name="points_estimate",
|
|
@@ -411,7 +405,7 @@ points_estimate_transformer = EstimatorTransformer(
|
|
|
411
405
|
# Stage 2: Refine estimate using Stage 1 output
|
|
412
406
|
player_points_pipeline = AutoPipeline(
|
|
413
407
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
414
|
-
|
|
408
|
+
estimator_features=features_pipeline.features_out, # Original features
|
|
415
409
|
# predictor_transformers execute first, adding their predictions
|
|
416
410
|
predictor_transformers=[points_estimate_transformer],
|
|
417
411
|
)
|
|
@@ -446,4 +440,3 @@ For complete, runnable examples with detailed explanations:
|
|
|
446
440
|
- **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
|
|
447
441
|
- **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
|
|
448
442
|
- **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
|
|
449
|
-
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
from lightgbm import LGBMRegressor
|
|
3
|
+
|
|
4
|
+
from examples import get_sub_sample_lol_data
|
|
5
|
+
from spforge import AutoPipeline, ColumnNames, FeatureGeneratorPipeline
|
|
6
|
+
from spforge.distributions import NegativeBinomialEstimator
|
|
7
|
+
from spforge.feature_generator import LagTransformer, RollingWindowTransformer
|
|
8
|
+
from spforge.transformers import EstimatorTransformer
|
|
9
|
+
|
|
10
|
+
column_names = ColumnNames(
|
|
11
|
+
team_id="teamname",
|
|
12
|
+
match_id="gameid",
|
|
13
|
+
start_date="date",
|
|
14
|
+
player_id="player_uid",
|
|
15
|
+
league="league",
|
|
16
|
+
position="position",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
df = get_sub_sample_lol_data(as_pandas=False, as_polars=True)
|
|
20
|
+
df = (
|
|
21
|
+
df.with_columns(
|
|
22
|
+
pl.concat_str([pl.col("playername"), pl.col("teamname")], separator="__").alias(
|
|
23
|
+
column_names.player_id
|
|
24
|
+
)
|
|
25
|
+
)
|
|
26
|
+
.filter(pl.col(column_names.position) != "team")
|
|
27
|
+
.with_columns(
|
|
28
|
+
pl.col(column_names.team_id)
|
|
29
|
+
.n_unique()
|
|
30
|
+
.over(column_names.match_id)
|
|
31
|
+
.alias("team_count"),
|
|
32
|
+
pl.col(column_names.player_id)
|
|
33
|
+
.n_unique()
|
|
34
|
+
.over([column_names.match_id, column_names.team_id])
|
|
35
|
+
.alias("player_count"),
|
|
36
|
+
)
|
|
37
|
+
.filter((pl.col("team_count") == 2) & (pl.col("player_count") == 5))
|
|
38
|
+
.drop(["team_count", "player_count"])
|
|
39
|
+
.unique(subset=[column_names.match_id, column_names.player_id, column_names.team_id])
|
|
40
|
+
.sort(
|
|
41
|
+
[
|
|
42
|
+
column_names.start_date,
|
|
43
|
+
column_names.match_id,
|
|
44
|
+
column_names.team_id,
|
|
45
|
+
column_names.player_id,
|
|
46
|
+
]
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
most_recent_10_games = (
|
|
51
|
+
df.select(pl.col(column_names.match_id))
|
|
52
|
+
.unique(maintain_order=True)
|
|
53
|
+
.tail(10)
|
|
54
|
+
.get_column(column_names.match_id)
|
|
55
|
+
.to_list()
|
|
56
|
+
)
|
|
57
|
+
historical_df = df.filter(~pl.col(column_names.match_id).is_in(most_recent_10_games))
|
|
58
|
+
future_df = df.filter(pl.col(column_names.match_id).is_in(most_recent_10_games)).drop("kills")
|
|
59
|
+
|
|
60
|
+
lag_transformers = [
|
|
61
|
+
LagTransformer(features=["kills", "deaths"], lag_length=3, granularity=["player_uid"]),
|
|
62
|
+
RollingWindowTransformer(
|
|
63
|
+
features=["kills", "deaths"],
|
|
64
|
+
window=20,
|
|
65
|
+
min_periods=1,
|
|
66
|
+
granularity=["player_uid"],
|
|
67
|
+
),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
features_generator = FeatureGeneratorPipeline(
|
|
71
|
+
column_names=column_names,
|
|
72
|
+
feature_generators=lag_transformers,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
historical_df = features_generator.fit_transform(historical_df).to_pandas()
|
|
76
|
+
future_df = features_generator.future_transform(future_df).to_pandas()
|
|
77
|
+
|
|
78
|
+
point_estimate_transformer = EstimatorTransformer(
|
|
79
|
+
prediction_column_name="kills_estimate",
|
|
80
|
+
estimator=LGBMRegressor(verbose=-100, random_state=42),
|
|
81
|
+
features=features_generator.features_out,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
probability_estimator = NegativeBinomialEstimator(
|
|
85
|
+
max_value=15,
|
|
86
|
+
point_estimate_pred_column="kills_estimate",
|
|
87
|
+
r_specific_granularity=[column_names.player_id],
|
|
88
|
+
predicted_r_weight=1,
|
|
89
|
+
column_names=column_names,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
pipeline = AutoPipeline(
|
|
93
|
+
estimator=probability_estimator,
|
|
94
|
+
estimator_features=features_generator.features_out,
|
|
95
|
+
predictor_transformers=[point_estimate_transformer],
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
pipeline.fit(X=historical_df, y=historical_df["kills"])
|
|
99
|
+
|
|
100
|
+
future_point_estimates = pipeline.predict(future_df)
|
|
101
|
+
future_probabilities = pipeline.predict_proba(future_df)
|
|
102
|
+
future_df["kills_pred"] = future_point_estimates
|
|
103
|
+
|
|
104
|
+
print(future_df.head(5))
|
|
105
|
+
print(f"Probability matrix shape: {future_probabilities.shape}")
|
|
106
|
+
print(f"First row probabilities (0-15 kills): {future_probabilities[0]}")
|
|
@@ -51,7 +51,7 @@ print("\nApproach 1: LGBMClassifier (direct probability prediction)")
|
|
|
51
51
|
print("-" * 70)
|
|
52
52
|
pipeline_classifier = AutoPipeline(
|
|
53
53
|
estimator=LGBMClassifier(verbose=-100, random_state=42),
|
|
54
|
-
|
|
54
|
+
estimator_features=features_generator.features_out,
|
|
55
55
|
)
|
|
56
56
|
|
|
57
57
|
cross_validator_classifier = MatchKFoldCrossValidator(
|
|
@@ -60,7 +60,7 @@ cross_validator_classifier = MatchKFoldCrossValidator(
|
|
|
60
60
|
estimator=pipeline_classifier,
|
|
61
61
|
prediction_column_name="points_probabilities_classifier",
|
|
62
62
|
target_column="points",
|
|
63
|
-
features=pipeline_classifier.
|
|
63
|
+
features=pipeline_classifier.required_features,
|
|
64
64
|
)
|
|
65
65
|
validation_df_classifier = cross_validator_classifier.generate_validation_df(df=df)
|
|
66
66
|
|
|
@@ -80,20 +80,13 @@ print("-" * 70)
|
|
|
80
80
|
predictor_negbin = NegativeBinomialEstimator(
|
|
81
81
|
max_value=40,
|
|
82
82
|
point_estimate_pred_column="points_estimate",
|
|
83
|
-
r_specific_granularity=["player_id"],
|
|
84
83
|
predicted_r_weight=1,
|
|
85
84
|
column_names=column_names,
|
|
86
85
|
)
|
|
87
86
|
|
|
88
87
|
pipeline_negbin = AutoPipeline(
|
|
89
88
|
estimator=predictor_negbin,
|
|
90
|
-
|
|
91
|
-
context_feature_names=[
|
|
92
|
-
column_names.player_id,
|
|
93
|
-
column_names.start_date,
|
|
94
|
-
column_names.team_id,
|
|
95
|
-
column_names.match_id,
|
|
96
|
-
],
|
|
89
|
+
estimator_features=features_generator.features_out,
|
|
97
90
|
predictor_transformers=[
|
|
98
91
|
EstimatorTransformer(
|
|
99
92
|
prediction_column_name="points_estimate",
|
|
@@ -109,7 +102,7 @@ cross_validator_negbin = MatchKFoldCrossValidator(
|
|
|
109
102
|
estimator=pipeline_negbin,
|
|
110
103
|
prediction_column_name="points_probabilities_negbin",
|
|
111
104
|
target_column="points",
|
|
112
|
-
features=pipeline_negbin.
|
|
105
|
+
features=pipeline_negbin.required_features,
|
|
113
106
|
)
|
|
114
107
|
validation_df_negbin = cross_validator_negbin.generate_validation_df(df=df)
|
|
115
108
|
|
|
@@ -13,7 +13,7 @@ Key concepts covered:
|
|
|
13
13
|
- State management: fit_transform vs future_transform
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
import
|
|
16
|
+
import polars as pl
|
|
17
17
|
|
|
18
18
|
from examples import get_sub_sample_nba_data
|
|
19
19
|
from spforge import FeatureGeneratorPipeline
|
|
@@ -22,7 +22,7 @@ from spforge.feature_generator import LagTransformer, RollingWindowTransformer
|
|
|
22
22
|
from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
23
23
|
|
|
24
24
|
# Load sample NBA data
|
|
25
|
-
df = get_sub_sample_nba_data(as_pandas=
|
|
25
|
+
df = get_sub_sample_nba_data(as_pandas=False, as_polars=True)
|
|
26
26
|
|
|
27
27
|
# Define column mappings for your dataset
|
|
28
28
|
# This tells spforge which columns contain team IDs, player IDs, dates, etc.
|
|
@@ -35,7 +35,7 @@ column_names = ColumnNames(
|
|
|
35
35
|
|
|
36
36
|
# CRITICAL: Always sort data chronologically before generating features
|
|
37
37
|
# This ensures temporal ordering and prevents future leakage (using future data to predict the past)
|
|
38
|
-
df = df.
|
|
38
|
+
df = df.sort(
|
|
39
39
|
[
|
|
40
40
|
column_names.start_date, # First by date
|
|
41
41
|
column_names.match_id, # Then by match
|
|
@@ -46,13 +46,21 @@ df = df.sort_values(
|
|
|
46
46
|
|
|
47
47
|
# Keep only games with exactly 2 teams (filter out invalid data)
|
|
48
48
|
df = (
|
|
49
|
-
df.
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
df.with_columns(
|
|
50
|
+
pl.col(column_names.team_id)
|
|
51
|
+
.n_unique()
|
|
52
|
+
.over(column_names.match_id)
|
|
53
|
+
.alias("team_count")
|
|
54
|
+
)
|
|
55
|
+
.filter(pl.col("team_count") == 2)
|
|
56
|
+
.drop("team_count")
|
|
52
57
|
)
|
|
53
58
|
|
|
54
|
-
|
|
55
|
-
|
|
59
|
+
match_count = df.select(pl.col(column_names.match_id).n_unique()).to_series().item()
|
|
60
|
+
start_date = df.select(pl.col(column_names.start_date).min()).to_series().item()
|
|
61
|
+
end_date = df.select(pl.col(column_names.start_date).max()).to_series().item()
|
|
62
|
+
print(f"Dataset: {len(df)} rows, {match_count} games")
|
|
63
|
+
print(f"Date range: {start_date} to {end_date}")
|
|
56
64
|
print()
|
|
57
65
|
|
|
58
66
|
# ====================================================================
|
|
@@ -125,12 +133,22 @@ print()
|
|
|
125
133
|
# ====================================================================
|
|
126
134
|
|
|
127
135
|
# Split data into historical (for training) and future (for prediction)
|
|
128
|
-
most_recent_5_games =
|
|
129
|
-
|
|
130
|
-
|
|
136
|
+
most_recent_5_games = (
|
|
137
|
+
df.select(pl.col(column_names.match_id))
|
|
138
|
+
.unique(maintain_order=True)
|
|
139
|
+
.tail(5)
|
|
140
|
+
.get_column(column_names.match_id)
|
|
141
|
+
.to_list()
|
|
142
|
+
)
|
|
143
|
+
historical_df = df.filter(~pl.col(column_names.match_id).is_in(most_recent_5_games))
|
|
144
|
+
future_df = df.filter(pl.col(column_names.match_id).is_in(most_recent_5_games))
|
|
131
145
|
|
|
132
|
-
|
|
133
|
-
|
|
146
|
+
historical_games = (
|
|
147
|
+
historical_df.select(pl.col(column_names.match_id).n_unique()).to_series().item()
|
|
148
|
+
)
|
|
149
|
+
future_games = future_df.select(pl.col(column_names.match_id).n_unique()).to_series().item()
|
|
150
|
+
print(f"Historical data: {len(historical_df)} rows, {historical_games} games")
|
|
151
|
+
print(f"Future data: {len(future_df)} rows, {future_games} games")
|
|
134
152
|
print()
|
|
135
153
|
|
|
136
154
|
# FIT_TRANSFORM: Learn from historical data
|
|
@@ -138,7 +156,7 @@ print()
|
|
|
138
156
|
# - Lags/rolling windows build up from initial games
|
|
139
157
|
# - Internal state (ratings, windows) is MUTATED
|
|
140
158
|
print("Applying fit_transform to historical data...")
|
|
141
|
-
historical_df = features_pipeline.fit_transform(historical_df)
|
|
159
|
+
historical_df = features_pipeline.fit_transform(historical_df).to_pandas()
|
|
142
160
|
print(f" Generated {len(features_pipeline.features_out)} features:")
|
|
143
161
|
for feature in features_pipeline.features_out:
|
|
144
162
|
print(f" - {feature}")
|
|
@@ -149,7 +167,7 @@ print()
|
|
|
149
167
|
# - Appends current game to lag/rolling windows but doesn't persist the update
|
|
150
168
|
# - This is what you use in production: generate features without affecting your model's state
|
|
151
169
|
print("Applying future_transform to future data (read-only)...")
|
|
152
|
-
future_df_transformed = features_pipeline.future_transform(future_df)
|
|
170
|
+
future_df_transformed = features_pipeline.future_transform(future_df).to_pandas()
|
|
153
171
|
print(f" Future data now has {len(future_df_transformed.columns)} columns")
|
|
154
172
|
print()
|
|
155
173
|
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import
|
|
1
|
+
import polars as pl
|
|
2
2
|
from sklearn.linear_model import LogisticRegression
|
|
3
3
|
|
|
4
|
+
from examples import get_sub_sample_nba_data
|
|
4
5
|
from spforge.autopipeline import AutoPipeline
|
|
5
6
|
from spforge.data_structures import ColumnNames
|
|
6
7
|
from spforge.ratings import RatingKnownFeatures
|
|
7
8
|
from spforge.ratings._player_rating import PlayerRatingGenerator
|
|
8
9
|
|
|
9
|
-
df =
|
|
10
|
+
df = get_sub_sample_nba_data(as_pandas=False, as_polars=True)
|
|
10
11
|
|
|
11
12
|
# Defines the column names as they appear in the dataframe
|
|
12
13
|
column_names = ColumnNames(
|
|
@@ -16,8 +17,8 @@ column_names = ColumnNames(
|
|
|
16
17
|
player_id="player_name",
|
|
17
18
|
)
|
|
18
19
|
# Sorts the dataframe. The dataframe must always be sorted as below
|
|
19
|
-
df = df.
|
|
20
|
-
|
|
20
|
+
df = df.sort(
|
|
21
|
+
[
|
|
21
22
|
column_names.start_date,
|
|
22
23
|
column_names.match_id,
|
|
23
24
|
column_names.team_id,
|
|
@@ -27,17 +28,26 @@ df = df.sort_values(
|
|
|
27
28
|
|
|
28
29
|
# Drops games with less or more than 2 teams
|
|
29
30
|
df = (
|
|
30
|
-
df.
|
|
31
|
-
|
|
31
|
+
df.with_columns(
|
|
32
|
+
pl.col(column_names.team_id)
|
|
33
|
+
.n_unique()
|
|
34
|
+
.over(column_names.match_id)
|
|
35
|
+
.alias("team_count")
|
|
32
36
|
)
|
|
33
|
-
.
|
|
34
|
-
.drop(
|
|
37
|
+
.filter(pl.col("team_count") == 2)
|
|
38
|
+
.drop("team_count")
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
# Pretends the last 10 games are future games. The most will be trained on everything before that.
|
|
38
|
-
most_recent_10_games =
|
|
39
|
-
|
|
40
|
-
|
|
42
|
+
most_recent_10_games = (
|
|
43
|
+
df.select(pl.col(column_names.match_id))
|
|
44
|
+
.unique(maintain_order=True)
|
|
45
|
+
.tail(10)
|
|
46
|
+
.get_column(column_names.match_id)
|
|
47
|
+
.to_list()
|
|
48
|
+
)
|
|
49
|
+
historical_df = df.filter(~pl.col(column_names.match_id).is_in(most_recent_10_games))
|
|
50
|
+
future_df = df.filter(pl.col(column_names.match_id).is_in(most_recent_10_games)).drop("won")
|
|
41
51
|
|
|
42
52
|
# Defining a simple rating-generator. It will use the "won" column to update the ratings.
|
|
43
53
|
# In contrast to a typical Elo, ratings will follow players.
|
|
@@ -49,7 +59,7 @@ rating_generator = PlayerRatingGenerator(
|
|
|
49
59
|
column_names=column_names,
|
|
50
60
|
non_predictor_features_out=[RatingKnownFeatures.PLAYER_RATING],
|
|
51
61
|
)
|
|
52
|
-
historical_df = rating_generator.fit_transform(historical_df)
|
|
62
|
+
historical_df = rating_generator.fit_transform(historical_df).to_pandas()
|
|
53
63
|
|
|
54
64
|
# Defines the predictor. A machine-learning model will be used to predict game winner on a game-team-level.
|
|
55
65
|
# Mean team-ratings will be calculated (from player-level) and rating-difference between the 2 teams calculated.
|
|
@@ -61,13 +71,13 @@ historical_df = rating_generator.fit_transform(historical_df)
|
|
|
61
71
|
pipeline = AutoPipeline(
|
|
62
72
|
estimator=LogisticRegression(),
|
|
63
73
|
granularity=["game_id", "team_id"],
|
|
64
|
-
|
|
74
|
+
estimator_features=rating_generator.features_out + ["location"],
|
|
65
75
|
)
|
|
66
76
|
|
|
67
77
|
pipeline.fit(X=historical_df, y=historical_df["won"])
|
|
68
78
|
|
|
69
79
|
# Future predictions on future results
|
|
70
|
-
future_df = rating_generator.future_transform(future_df)
|
|
80
|
+
future_df = rating_generator.future_transform(future_df).to_pandas()
|
|
71
81
|
future_predictions = pipeline.predict_proba(future_df)[:, 1]
|
|
72
82
|
future_df["game_winner_probability"] = future_predictions
|
|
73
83
|
# Grouping predictions from game-player level to game-level.
|
|
@@ -12,7 +12,7 @@ Key concepts covered:
|
|
|
12
12
|
- Hierarchical modeling: Team strength → Player performance
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
import polars as pl
|
|
16
16
|
from lightgbm import LGBMRegressor
|
|
17
17
|
from sklearn.linear_model import LogisticRegression
|
|
18
18
|
|
|
@@ -24,7 +24,7 @@ from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
|
|
|
24
24
|
from spforge.transformers import EstimatorTransformer
|
|
25
25
|
|
|
26
26
|
# Load sample NBA data
|
|
27
|
-
df = get_sub_sample_nba_data(as_pandas=
|
|
27
|
+
df = get_sub_sample_nba_data(as_pandas=False, as_polars=True)
|
|
28
28
|
|
|
29
29
|
# Define column mappings
|
|
30
30
|
column_names = ColumnNames(
|
|
@@ -35,7 +35,7 @@ column_names = ColumnNames(
|
|
|
35
35
|
)
|
|
36
36
|
|
|
37
37
|
# Sort data chronologically (critical for temporal correctness)
|
|
38
|
-
df = df.
|
|
38
|
+
df = df.sort(
|
|
39
39
|
[
|
|
40
40
|
column_names.start_date,
|
|
41
41
|
column_names.match_id,
|
|
@@ -46,18 +46,31 @@ df = df.sort_values(
|
|
|
46
46
|
|
|
47
47
|
# Filter to valid games
|
|
48
48
|
df = (
|
|
49
|
-
df.
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
df.with_columns(
|
|
50
|
+
pl.col(column_names.team_id)
|
|
51
|
+
.n_unique()
|
|
52
|
+
.over(column_names.match_id)
|
|
53
|
+
.alias("team_count")
|
|
54
|
+
)
|
|
55
|
+
.filter(pl.col("team_count") == 2)
|
|
56
|
+
.drop("team_count")
|
|
52
57
|
)
|
|
53
58
|
|
|
54
59
|
# Train/test split (using temporal ordering)
|
|
55
|
-
most_recent_10_games =
|
|
56
|
-
|
|
57
|
-
|
|
60
|
+
most_recent_10_games = (
|
|
61
|
+
df.select(pl.col(column_names.match_id))
|
|
62
|
+
.unique(maintain_order=True)
|
|
63
|
+
.tail(10)
|
|
64
|
+
.get_column(column_names.match_id)
|
|
65
|
+
.to_list()
|
|
66
|
+
)
|
|
67
|
+
train_df = df.filter(~pl.col(column_names.match_id).is_in(most_recent_10_games))
|
|
68
|
+
test_df = df.filter(pl.col(column_names.match_id).is_in(most_recent_10_games))
|
|
58
69
|
|
|
59
|
-
|
|
60
|
-
|
|
70
|
+
train_games = train_df.select(pl.col(column_names.match_id).n_unique()).to_series().item()
|
|
71
|
+
test_games = test_df.select(pl.col(column_names.match_id).n_unique()).to_series().item()
|
|
72
|
+
print(f"Training: {len(train_df)} rows, {train_games} games")
|
|
73
|
+
print(f"Testing: {len(test_df)} rows, {test_games} games")
|
|
61
74
|
print()
|
|
62
75
|
|
|
63
76
|
# ====================================================================
|
|
@@ -86,8 +99,8 @@ features_pipeline = FeatureGeneratorPipeline(
|
|
|
86
99
|
)
|
|
87
100
|
|
|
88
101
|
# Generate features
|
|
89
|
-
train_df = features_pipeline.fit_transform(train_df)
|
|
90
|
-
test_df = features_pipeline.future_transform(test_df)
|
|
102
|
+
train_df = features_pipeline.fit_transform(train_df).to_pandas()
|
|
103
|
+
test_df = features_pipeline.future_transform(test_df).to_pandas()
|
|
91
104
|
|
|
92
105
|
print(f"Generated {len(features_pipeline.features_out)} baseline features")
|
|
93
106
|
print()
|
|
@@ -121,7 +134,7 @@ player_points_pipeline = AutoPipeline(
|
|
|
121
134
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
122
135
|
# Features for the final estimator (only pre-game information)
|
|
123
136
|
# Note: points_estimate_raw will be added by the transformer
|
|
124
|
-
|
|
137
|
+
estimator_features=features_pipeline.features_out,
|
|
125
138
|
# The predictor_transformers parameter chains the estimators
|
|
126
139
|
predictor_transformers=[points_estimate_transformer], # Stage 1 executes first
|
|
127
140
|
)
|
|
@@ -150,7 +163,7 @@ print()
|
|
|
150
163
|
|
|
151
164
|
# Fit the pipeline
|
|
152
165
|
# The y target here is for the FINAL estimator (player points)
|
|
153
|
-
#
|
|
166
|
+
# Predictor_transformers are trained on the same target during fit()
|
|
154
167
|
player_points_pipeline.fit(X=train_df, y=train_df["points"])
|
|
155
168
|
|
|
156
169
|
print("Training complete!")
|
|
@@ -188,7 +201,7 @@ print()
|
|
|
188
201
|
|
|
189
202
|
single_stage_pipeline = AutoPipeline(
|
|
190
203
|
estimator=LGBMRegressor(verbose=-100, n_estimators=50),
|
|
191
|
-
|
|
204
|
+
estimator_features=features_pipeline.features_out,
|
|
192
205
|
)
|
|
193
206
|
|
|
194
207
|
print("Training single-stage baseline for comparison...")
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.7"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -127,7 +127,7 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
127
127
|
"""
|
|
128
128
|
Default search space for PlayerRatingGenerator.
|
|
129
129
|
|
|
130
|
-
Focuses on
|
|
130
|
+
Focuses on core parameters that have the most impact on performance.
|
|
131
131
|
|
|
132
132
|
Returns:
|
|
133
133
|
Dictionary mapping parameter names to ParamSpec objects
|
|
@@ -167,6 +167,31 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
167
167
|
param_type="categorical",
|
|
168
168
|
choices=["difference", "mean", "ignore_opponent"],
|
|
169
169
|
),
|
|
170
|
+
"start_league_quantile": ParamSpec(
|
|
171
|
+
param_type="float",
|
|
172
|
+
low=0.05,
|
|
173
|
+
high=0.5,
|
|
174
|
+
),
|
|
175
|
+
"start_min_count_for_percentiles": ParamSpec(
|
|
176
|
+
param_type="int",
|
|
177
|
+
low=40,
|
|
178
|
+
high=500,
|
|
179
|
+
),
|
|
180
|
+
"start_team_rating_subtract": ParamSpec(
|
|
181
|
+
param_type="float",
|
|
182
|
+
low=0.0,
|
|
183
|
+
high=200.0,
|
|
184
|
+
),
|
|
185
|
+
"start_team_weight": ParamSpec(
|
|
186
|
+
param_type="float",
|
|
187
|
+
low=0.0,
|
|
188
|
+
high=1.0,
|
|
189
|
+
),
|
|
190
|
+
"start_min_match_count_team_rating": ParamSpec(
|
|
191
|
+
param_type="int",
|
|
192
|
+
low=1,
|
|
193
|
+
high=10,
|
|
194
|
+
),
|
|
170
195
|
}
|
|
171
196
|
|
|
172
197
|
|
|
@@ -6,3 +6,7 @@ from .enums import (
|
|
|
6
6
|
RatingUnknownFeatures as RatingUnknownFeatures,
|
|
7
7
|
)
|
|
8
8
|
from .league_identifier import LeagueIdentifier as LeagueIdentifier
|
|
9
|
+
from .league_start_rating_optimizer import (
|
|
10
|
+
LeagueStartRatingOptimizationResult as LeagueStartRatingOptimizationResult,
|
|
11
|
+
LeagueStartRatingOptimizer as LeagueStartRatingOptimizer,
|
|
12
|
+
)
|