spforge 0.8.17__tar.gz → 0.8.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.17/spforge.egg-info → spforge-0.8.19}/PKG-INFO +1 -1
- {spforge-0.8.17 → spforge-0.8.19}/pyproject.toml +1 -1
- {spforge-0.8.17 → spforge-0.8.19}/spforge/autopipeline.py +11 -1
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_group_by_estimator.py +11 -3
- {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/__init__.py +2 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/_default_search_spaces.py +38 -23
- {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/_tuner.py +55 -2
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.17 → spforge-0.8.19/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -4
- {spforge-0.8.17 → spforge-0.8.19}/tests/hyperparameter_tuning/test_rating_tuner.py +157 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/test_autopipeline.py +143 -7
- {spforge-0.8.17 → spforge-0.8.19}/LICENSE +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/MANIFEST.in +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/README.md +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/game_level_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/lol/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/setup.cfg +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/data_structures.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_player_rating.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/scorer/_score.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/SOURCES.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_player_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_player_rating_no_mutation.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_utils_scaled_weights.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.19
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.19"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -236,6 +236,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
236
236
|
estimator_features: list[str],
|
|
237
237
|
predictor_transformers: list[PredictorTransformer] | None = None,
|
|
238
238
|
granularity: list[str] | None = None,
|
|
239
|
+
aggregation_weight: str | None = None,
|
|
239
240
|
filters: list[Filter] | None = None,
|
|
240
241
|
scale_features: bool = False,
|
|
241
242
|
categorical_handling: CategoricalHandling = "auto",
|
|
@@ -250,6 +251,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
250
251
|
self.estimator_features = estimator_features
|
|
251
252
|
self.feature_names = estimator_features # Internal compat
|
|
252
253
|
self.granularity = granularity or []
|
|
254
|
+
self.aggregation_weight = aggregation_weight
|
|
253
255
|
self.predictor_transformers = predictor_transformers
|
|
254
256
|
self.estimator = estimator
|
|
255
257
|
self.filters = filters or []
|
|
@@ -326,6 +328,10 @@ class AutoPipeline(BaseEstimator):
|
|
|
326
328
|
# Add granularity columns
|
|
327
329
|
context.extend(self.granularity)
|
|
328
330
|
|
|
331
|
+
# Add aggregation weight column
|
|
332
|
+
if self.aggregation_weight:
|
|
333
|
+
context.append(self.aggregation_weight)
|
|
334
|
+
|
|
329
335
|
# Add filter columns
|
|
330
336
|
self._filter_feature_names = []
|
|
331
337
|
for f in self.filters:
|
|
@@ -492,7 +498,11 @@ class AutoPipeline(BaseEstimator):
|
|
|
492
498
|
pre = PreprocessorToDataFrame(pre_raw)
|
|
493
499
|
|
|
494
500
|
est = (
|
|
495
|
-
GroupByEstimator(
|
|
501
|
+
GroupByEstimator(
|
|
502
|
+
self.estimator,
|
|
503
|
+
granularity=[f"{c}" for c in self.granularity],
|
|
504
|
+
aggregation_weight=self.aggregation_weight,
|
|
505
|
+
)
|
|
496
506
|
if do_groupby
|
|
497
507
|
else self.estimator
|
|
498
508
|
)
|
|
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class GroupByEstimator(BaseEstimator):
|
|
13
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
estimator: Any,
|
|
16
|
+
granularity: list[str] | None = None,
|
|
17
|
+
aggregation_weight: str | None = None,
|
|
18
|
+
):
|
|
14
19
|
self.estimator = estimator
|
|
15
20
|
self.granularity = granularity or []
|
|
16
|
-
self.
|
|
21
|
+
self.aggregation_weight = aggregation_weight
|
|
22
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
|
|
17
23
|
self._est = None
|
|
18
24
|
|
|
19
25
|
def __sklearn_is_fitted__(self):
|
|
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
|
|
|
22
28
|
@nw.narwhalify
|
|
23
29
|
def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
|
|
24
30
|
X = X.to_pandas()
|
|
25
|
-
|
|
31
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
32
|
+
agg_weight = getattr(self, "aggregation_weight", None)
|
|
33
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
|
|
26
34
|
X_red = nw.from_native(self._reducer.fit_transform(X))
|
|
27
35
|
y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
|
|
28
36
|
|
|
@@ -7,6 +7,7 @@ from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
|
7
7
|
get_default_search_space,
|
|
8
8
|
get_default_student_t_search_space,
|
|
9
9
|
get_default_team_rating_search_space,
|
|
10
|
+
get_full_player_rating_search_space,
|
|
10
11
|
)
|
|
11
12
|
from spforge.hyperparameter_tuning._tuner import (
|
|
12
13
|
EstimatorHyperparameterTuner,
|
|
@@ -28,4 +29,5 @@ __all__ = [
|
|
|
28
29
|
"get_default_team_rating_search_space",
|
|
29
30
|
"get_default_student_t_search_space",
|
|
30
31
|
"get_default_search_space",
|
|
32
|
+
"get_full_player_rating_search_space",
|
|
31
33
|
]
|
|
@@ -128,6 +128,7 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
128
128
|
Default search space for PlayerRatingGenerator.
|
|
129
129
|
|
|
130
130
|
Focuses on core parameters that have the most impact on performance.
|
|
131
|
+
Excludes performance_predictor and team-based start rating params.
|
|
131
132
|
|
|
132
133
|
Returns:
|
|
133
134
|
Dictionary mapping parameter names to ParamSpec objects
|
|
@@ -163,10 +164,6 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
163
164
|
"use_off_def_split": ParamSpec(
|
|
164
165
|
param_type="bool",
|
|
165
166
|
),
|
|
166
|
-
"performance_predictor": ParamSpec(
|
|
167
|
-
param_type="categorical",
|
|
168
|
-
choices=["difference", "mean", "ignore_opponent"],
|
|
169
|
-
),
|
|
170
167
|
"start_league_quantile": ParamSpec(
|
|
171
168
|
param_type="float",
|
|
172
169
|
low=0.05,
|
|
@@ -177,24 +174,46 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
177
174
|
low=40,
|
|
178
175
|
high=500,
|
|
179
176
|
),
|
|
180
|
-
"start_team_rating_subtract": ParamSpec(
|
|
181
|
-
param_type="float",
|
|
182
|
-
low=0.0,
|
|
183
|
-
high=200.0,
|
|
184
|
-
),
|
|
185
|
-
"start_team_weight": ParamSpec(
|
|
186
|
-
param_type="float",
|
|
187
|
-
low=0.0,
|
|
188
|
-
high=1.0,
|
|
189
|
-
),
|
|
190
|
-
"start_min_match_count_team_rating": ParamSpec(
|
|
191
|
-
param_type="int",
|
|
192
|
-
low=1,
|
|
193
|
-
high=10,
|
|
194
|
-
),
|
|
195
177
|
}
|
|
196
178
|
|
|
197
179
|
|
|
180
|
+
def get_full_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
181
|
+
"""
|
|
182
|
+
Full search space for PlayerRatingGenerator including all tunable parameters.
|
|
183
|
+
|
|
184
|
+
Includes performance_predictor and team-based start rating parameters.
|
|
185
|
+
Use this when you want to tune all parameters.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dictionary mapping parameter names to ParamSpec objects
|
|
189
|
+
"""
|
|
190
|
+
base = get_default_player_rating_search_space()
|
|
191
|
+
base.update(
|
|
192
|
+
{
|
|
193
|
+
"performance_predictor": ParamSpec(
|
|
194
|
+
param_type="categorical",
|
|
195
|
+
choices=["difference", "mean", "ignore_opponent"],
|
|
196
|
+
),
|
|
197
|
+
"start_team_rating_subtract": ParamSpec(
|
|
198
|
+
param_type="float",
|
|
199
|
+
low=0.0,
|
|
200
|
+
high=200.0,
|
|
201
|
+
),
|
|
202
|
+
"start_team_weight": ParamSpec(
|
|
203
|
+
param_type="float",
|
|
204
|
+
low=0.0,
|
|
205
|
+
high=1.0,
|
|
206
|
+
),
|
|
207
|
+
"start_min_match_count_team_rating": ParamSpec(
|
|
208
|
+
param_type="int",
|
|
209
|
+
low=1,
|
|
210
|
+
high=10,
|
|
211
|
+
),
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
return base
|
|
215
|
+
|
|
216
|
+
|
|
198
217
|
def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
|
|
199
218
|
"""
|
|
200
219
|
Default search space for TeamRatingGenerator.
|
|
@@ -235,10 +254,6 @@ def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
235
254
|
"use_off_def_split": ParamSpec(
|
|
236
255
|
param_type="bool",
|
|
237
256
|
),
|
|
238
|
-
"performance_predictor": ParamSpec(
|
|
239
|
-
param_type="categorical",
|
|
240
|
-
choices=["difference", "mean", "ignore_opponent"],
|
|
241
|
-
),
|
|
242
257
|
}
|
|
243
258
|
|
|
244
259
|
|
|
@@ -91,6 +91,9 @@ class RatingHyperparameterTuner:
|
|
|
91
91
|
scorer: BaseScorer,
|
|
92
92
|
direction: Literal["minimize", "maximize"],
|
|
93
93
|
param_search_space: dict[str, ParamSpec] | None = None,
|
|
94
|
+
param_ranges: dict[str, tuple[float | int, float | int]] | None = None,
|
|
95
|
+
exclude_params: list[str] | None = None,
|
|
96
|
+
fixed_params: dict[str, Any] | None = None,
|
|
94
97
|
n_trials: int = 50,
|
|
95
98
|
n_jobs: int = 1,
|
|
96
99
|
storage: str | None = None,
|
|
@@ -109,6 +112,14 @@ class RatingHyperparameterTuner:
|
|
|
109
112
|
scorer: Scorer for evaluation (must have score(df) -> float | dict)
|
|
110
113
|
direction: "minimize" or "maximize"
|
|
111
114
|
param_search_space: Custom search space (merges with defaults if provided)
|
|
115
|
+
param_ranges: Easy range override for float/int params. Maps param name to
|
|
116
|
+
(low, high) tuple. Preserves param_type and log scale from defaults.
|
|
117
|
+
Example: {"confidence_weight": (0.2, 1.0)}
|
|
118
|
+
exclude_params: List of param names to exclude from tuning entirely.
|
|
119
|
+
Example: ["performance_predictor", "use_off_def_split"]
|
|
120
|
+
fixed_params: Parameters to fix at specific values (not tuned).
|
|
121
|
+
These values are applied to the rating generator each trial.
|
|
122
|
+
Example: {"performance_predictor": "mean"}
|
|
112
123
|
n_trials: Number of optimization trials
|
|
113
124
|
n_jobs: Number of parallel jobs (1 = sequential)
|
|
114
125
|
storage: Optuna storage URL (e.g., "sqlite:///optuna.db") for persistence
|
|
@@ -123,6 +134,9 @@ class RatingHyperparameterTuner:
|
|
|
123
134
|
self.scorer = scorer
|
|
124
135
|
self.direction = direction
|
|
125
136
|
self.custom_search_space = param_search_space
|
|
137
|
+
self.param_ranges = param_ranges
|
|
138
|
+
self.exclude_params = exclude_params or []
|
|
139
|
+
self.fixed_params = fixed_params or {}
|
|
126
140
|
self.n_trials = n_trials
|
|
127
141
|
self.n_jobs = n_jobs
|
|
128
142
|
self.storage = storage
|
|
@@ -196,6 +210,9 @@ class RatingHyperparameterTuner:
|
|
|
196
210
|
try:
|
|
197
211
|
copied_gen = copy.deepcopy(self.rating_generator)
|
|
198
212
|
|
|
213
|
+
for param_name, param_value in self.fixed_params.items():
|
|
214
|
+
setattr(copied_gen, param_name, param_value)
|
|
215
|
+
|
|
199
216
|
trial_params = self._suggest_params(trial, search_space)
|
|
200
217
|
|
|
201
218
|
for param_name, param_value in trial_params.items():
|
|
@@ -243,18 +260,54 @@ class RatingHyperparameterTuner:
|
|
|
243
260
|
defaults: dict[str, ParamSpec],
|
|
244
261
|
) -> dict[str, ParamSpec]:
|
|
245
262
|
"""
|
|
246
|
-
Merge custom search space with defaults
|
|
263
|
+
Merge custom search space with defaults.
|
|
264
|
+
|
|
265
|
+
Priority order (highest to lowest):
|
|
266
|
+
1. exclude_params - removes param entirely
|
|
267
|
+
2. fixed_params - removes from search (applied separately)
|
|
268
|
+
3. custom (param_search_space) - full ParamSpec override
|
|
269
|
+
4. param_ranges - updates only low/high bounds
|
|
270
|
+
5. defaults - base search space
|
|
247
271
|
|
|
248
272
|
Args:
|
|
249
273
|
custom: Custom search space (may be None)
|
|
250
274
|
defaults: Default search space
|
|
251
275
|
|
|
252
276
|
Returns:
|
|
253
|
-
Merged search space
|
|
277
|
+
Merged search space (excludes fixed_params, those are applied separately)
|
|
254
278
|
"""
|
|
255
279
|
merged = defaults.copy()
|
|
280
|
+
|
|
281
|
+
if self.param_ranges:
|
|
282
|
+
for param_name, (low, high) in self.param_ranges.items():
|
|
283
|
+
if param_name not in merged:
|
|
284
|
+
raise ValueError(
|
|
285
|
+
f"param_ranges contains unknown parameter: '{param_name}'. "
|
|
286
|
+
f"Available parameters: {list(merged.keys())}"
|
|
287
|
+
)
|
|
288
|
+
existing = merged[param_name]
|
|
289
|
+
if existing.param_type not in ("float", "int"):
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f"param_ranges can only override float/int parameters. "
|
|
292
|
+
f"'{param_name}' is {existing.param_type}."
|
|
293
|
+
)
|
|
294
|
+
merged[param_name] = ParamSpec(
|
|
295
|
+
param_type=existing.param_type,
|
|
296
|
+
low=low,
|
|
297
|
+
high=high,
|
|
298
|
+
log=existing.log,
|
|
299
|
+
step=existing.step,
|
|
300
|
+
)
|
|
301
|
+
|
|
256
302
|
if custom:
|
|
257
303
|
merged.update(custom)
|
|
304
|
+
|
|
305
|
+
for param_name in self.exclude_params:
|
|
306
|
+
merged.pop(param_name, None)
|
|
307
|
+
|
|
308
|
+
for param_name in self.fixed_params:
|
|
309
|
+
merged.pop(param_name, None)
|
|
310
|
+
|
|
258
311
|
return merged
|
|
259
312
|
|
|
260
313
|
@staticmethod
|
|
@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
11
|
-
def __init__(self, granularity: list[str]):
|
|
11
|
+
def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
|
|
12
12
|
self.granularity = granularity
|
|
13
|
+
self.aggregation_weight = aggregation_weight
|
|
13
14
|
|
|
14
15
|
@nw.narwhalify
|
|
15
16
|
def fit(self, X: IntoFrameT, y: Any = None):
|
|
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
26
27
|
raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
|
|
27
28
|
|
|
28
29
|
non_keys = [c for c in df.columns if c not in keys]
|
|
29
|
-
|
|
30
|
+
schema = df.schema
|
|
31
|
+
num_cols = [c for c in non_keys if schema[c].is_numeric()]
|
|
30
32
|
other_cols = [c for c in non_keys if c not in num_cols]
|
|
31
33
|
|
|
32
34
|
aggs: list[nw.Expr] = []
|
|
33
35
|
|
|
36
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
37
|
+
weight_col = getattr(self, "aggregation_weight", None)
|
|
38
|
+
has_weight = weight_col and weight_col in df.columns
|
|
39
|
+
|
|
34
40
|
for c in num_cols:
|
|
35
|
-
|
|
41
|
+
if c == weight_col:
|
|
42
|
+
aggs.append(nw.col(c).sum().alias(c))
|
|
43
|
+
elif has_weight:
|
|
44
|
+
aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
|
|
45
|
+
aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
|
|
46
|
+
else:
|
|
47
|
+
aggs.append(nw.col(c).mean().alias(c))
|
|
36
48
|
|
|
37
49
|
for c in other_cols:
|
|
38
50
|
aggs.append(nw.col(c).first().alias(c))
|
|
39
51
|
|
|
52
|
+
if has_weight:
|
|
53
|
+
aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
|
|
54
|
+
|
|
40
55
|
out = df.group_by(keys).agg(aggs)
|
|
56
|
+
|
|
57
|
+
if has_weight:
|
|
58
|
+
weighted_cols = [c for c in num_cols if c != weight_col]
|
|
59
|
+
for c in weighted_cols:
|
|
60
|
+
out = out.with_columns(
|
|
61
|
+
nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
|
|
62
|
+
.then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
|
|
63
|
+
.otherwise(nw.col(f"__{c}_fallback"))
|
|
64
|
+
.alias(c)
|
|
65
|
+
)
|
|
66
|
+
drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
|
|
67
|
+
drop_cols += [f"__{c}_fallback" for c in weighted_cols]
|
|
68
|
+
drop_cols.append("__weight_sum")
|
|
69
|
+
out = out.drop(drop_cols)
|
|
70
|
+
|
|
41
71
|
return out
|
|
42
72
|
|
|
43
73
|
@nw.narwhalify
|
|
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
59
89
|
if sample_weight is not None:
|
|
60
90
|
df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
|
|
61
91
|
|
|
62
|
-
|
|
92
|
+
y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
|
|
93
|
+
non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
|
|
94
|
+
if len(non_uniform) > 0:
|
|
95
|
+
raise ValueError("Target (y) must be uniform within each granularity group")
|
|
63
96
|
|
|
64
|
-
|
|
65
|
-
agg_exprs = [nw.col("__y").mean().alias("__y")]
|
|
66
|
-
else:
|
|
67
|
-
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
97
|
+
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
68
98
|
|
|
69
99
|
if sample_weight is not None:
|
|
70
100
|
agg_exprs.append(nw.col("__sw").sum().alias("__sw"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.19
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
{spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py
RENAMED
|
@@ -96,12 +96,8 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
|
|
|
96
96
|
"confidence_value_denom",
|
|
97
97
|
"confidence_max_sum",
|
|
98
98
|
"use_off_def_split",
|
|
99
|
-
"performance_predictor",
|
|
100
|
-
"start_team_weight",
|
|
101
99
|
"start_league_quantile",
|
|
102
100
|
"start_min_count_for_percentiles",
|
|
103
|
-
"start_min_match_count_team_rating",
|
|
104
|
-
"start_team_rating_subtract",
|
|
105
101
|
}
|
|
106
102
|
assert set(result.best_params.keys()) == expected_params
|
|
107
103
|
|
|
@@ -454,3 +454,160 @@ def test_param_spec__categorical_requires_choices():
|
|
|
454
454
|
|
|
455
455
|
with pytest.raises(ValueError, match="requires choices"):
|
|
456
456
|
spec.suggest(trial, "test_param")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def test_param_ranges__overrides_bounds(
|
|
460
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
461
|
+
):
|
|
462
|
+
"""Test that param_ranges overrides low/high bounds while preserving param_type."""
|
|
463
|
+
tuner = RatingHyperparameterTuner(
|
|
464
|
+
rating_generator=player_rating_generator,
|
|
465
|
+
cross_validator=cross_validator,
|
|
466
|
+
scorer=scorer,
|
|
467
|
+
direction="minimize",
|
|
468
|
+
param_ranges={
|
|
469
|
+
"confidence_weight": (0.2, 0.3),
|
|
470
|
+
},
|
|
471
|
+
n_trials=3,
|
|
472
|
+
show_progress_bar=False,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
result = tuner.optimize(sample_player_df_pd)
|
|
476
|
+
|
|
477
|
+
assert "confidence_weight" in result.best_params
|
|
478
|
+
assert 0.2 <= result.best_params["confidence_weight"] <= 0.3
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def test_exclude_params__removes_from_search(
|
|
482
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
483
|
+
):
|
|
484
|
+
"""Test that exclude_params removes parameters from search space."""
|
|
485
|
+
tuner = RatingHyperparameterTuner(
|
|
486
|
+
rating_generator=player_rating_generator,
|
|
487
|
+
cross_validator=cross_validator,
|
|
488
|
+
scorer=scorer,
|
|
489
|
+
direction="minimize",
|
|
490
|
+
exclude_params=["use_off_def_split", "confidence_weight"],
|
|
491
|
+
n_trials=3,
|
|
492
|
+
show_progress_bar=False,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
result = tuner.optimize(sample_player_df_pd)
|
|
496
|
+
|
|
497
|
+
assert "use_off_def_split" not in result.best_params
|
|
498
|
+
assert "confidence_weight" not in result.best_params
|
|
499
|
+
assert "rating_change_multiplier_offense" in result.best_params
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def test_fixed_params__applies_values_without_tuning(
|
|
503
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
504
|
+
):
|
|
505
|
+
"""Test that fixed_params sets values without including in search space."""
|
|
506
|
+
tuner = RatingHyperparameterTuner(
|
|
507
|
+
rating_generator=player_rating_generator,
|
|
508
|
+
cross_validator=cross_validator,
|
|
509
|
+
scorer=scorer,
|
|
510
|
+
direction="minimize",
|
|
511
|
+
fixed_params={"use_off_def_split": False},
|
|
512
|
+
n_trials=3,
|
|
513
|
+
show_progress_bar=False,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
result = tuner.optimize(sample_player_df_pd)
|
|
517
|
+
|
|
518
|
+
assert "use_off_def_split" not in result.best_params
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def test_param_ranges__unknown_param_raises_error(
|
|
522
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
523
|
+
):
|
|
524
|
+
"""Test that param_ranges with unknown param raises ValueError."""
|
|
525
|
+
tuner = RatingHyperparameterTuner(
|
|
526
|
+
rating_generator=player_rating_generator,
|
|
527
|
+
cross_validator=cross_validator,
|
|
528
|
+
scorer=scorer,
|
|
529
|
+
direction="minimize",
|
|
530
|
+
param_ranges={"nonexistent_param": (0.0, 1.0)},
|
|
531
|
+
n_trials=3,
|
|
532
|
+
show_progress_bar=False,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
with pytest.raises(ValueError, match="unknown parameter"):
|
|
536
|
+
tuner.optimize(sample_player_df_pd)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def test_param_ranges__non_numeric_param_raises_error(
|
|
540
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
541
|
+
):
|
|
542
|
+
"""Test that param_ranges on non-float/int param raises ValueError."""
|
|
543
|
+
tuner = RatingHyperparameterTuner(
|
|
544
|
+
rating_generator=player_rating_generator,
|
|
545
|
+
cross_validator=cross_validator,
|
|
546
|
+
scorer=scorer,
|
|
547
|
+
direction="minimize",
|
|
548
|
+
param_ranges={"use_off_def_split": (0, 1)},
|
|
549
|
+
n_trials=3,
|
|
550
|
+
show_progress_bar=False,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
with pytest.raises(ValueError, match="can only override float/int"):
|
|
554
|
+
tuner.optimize(sample_player_df_pd)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def test_combined_api__param_ranges_exclude_fixed(
|
|
558
|
+
player_rating_generator, cross_validator, scorer, sample_player_df_pd
|
|
559
|
+
):
|
|
560
|
+
"""Test using param_ranges, exclude_params, and fixed_params together."""
|
|
561
|
+
tuner = RatingHyperparameterTuner(
|
|
562
|
+
rating_generator=player_rating_generator,
|
|
563
|
+
cross_validator=cross_validator,
|
|
564
|
+
scorer=scorer,
|
|
565
|
+
direction="minimize",
|
|
566
|
+
param_ranges={
|
|
567
|
+
"confidence_weight": (0.2, 1.0),
|
|
568
|
+
"rating_change_multiplier_offense": (10.0, 150.0),
|
|
569
|
+
},
|
|
570
|
+
exclude_params=["start_league_quantile"],
|
|
571
|
+
fixed_params={"use_off_def_split": False},
|
|
572
|
+
n_trials=3,
|
|
573
|
+
show_progress_bar=False,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
result = tuner.optimize(sample_player_df_pd)
|
|
577
|
+
|
|
578
|
+
assert 0.2 <= result.best_params["confidence_weight"] <= 1.0
|
|
579
|
+
assert 10.0 <= result.best_params["rating_change_multiplier_offense"] <= 150.0
|
|
580
|
+
assert "start_league_quantile" not in result.best_params
|
|
581
|
+
assert "use_off_def_split" not in result.best_params
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def test_default_search_space__excludes_performance_predictor_and_team_start(
|
|
585
|
+
player_rating_generator,
|
|
586
|
+
):
|
|
587
|
+
"""Test that performance_predictor and team start params are not in default search space."""
|
|
588
|
+
from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
589
|
+
get_default_search_space,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
defaults = get_default_search_space(player_rating_generator)
|
|
593
|
+
|
|
594
|
+
assert "performance_predictor" not in defaults
|
|
595
|
+
assert "start_team_rating_subtract" not in defaults
|
|
596
|
+
assert "start_team_weight" not in defaults
|
|
597
|
+
assert "start_min_match_count_team_rating" not in defaults
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def test_full_player_rating_search_space__includes_all_params():
|
|
601
|
+
"""Test that full search space includes performance_predictor and team start params."""
|
|
602
|
+
from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
603
|
+
get_full_player_rating_search_space,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
full = get_full_player_rating_search_space()
|
|
607
|
+
|
|
608
|
+
assert "performance_predictor" in full
|
|
609
|
+
assert "start_team_rating_subtract" in full
|
|
610
|
+
assert "start_team_weight" in full
|
|
611
|
+
assert "start_min_match_count_team_rating" in full
|
|
612
|
+
assert "rating_change_multiplier_offense" in full
|
|
613
|
+
assert "confidence_weight" in full
|
|
@@ -328,7 +328,18 @@ def test_infer_categorical_from_feature_names_when_only_numeric_features_given(d
|
|
|
328
328
|
assert any(c.startswith("cat") for c in cap.fit_columns)
|
|
329
329
|
|
|
330
330
|
|
|
331
|
-
def test_granularity_groups_rows_before_estimator_fit_and_predict(
|
|
331
|
+
def test_granularity_groups_rows_before_estimator_fit_and_predict(frame):
|
|
332
|
+
df_pd = pd.DataFrame(
|
|
333
|
+
{
|
|
334
|
+
"gameid": ["g1", "g1", "g2", "g2", "g3", "g3"],
|
|
335
|
+
"num1": [1.0, 2.0, np.nan, 4.0, 5.0, 6.0],
|
|
336
|
+
"num2": [10.0, 20.0, 30.0, 40.0, np.nan, 60.0],
|
|
337
|
+
"cat1": ["a", "b", "a", None, "b", "c"],
|
|
338
|
+
"y": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
342
|
+
|
|
332
343
|
model = AutoPipeline(
|
|
333
344
|
estimator=CaptureEstimator(),
|
|
334
345
|
estimator_features=["gameid", "num1", "num2", "cat1"],
|
|
@@ -339,16 +350,16 @@ def test_granularity_groups_rows_before_estimator_fit_and_predict(df_reg):
|
|
|
339
350
|
remainder="drop",
|
|
340
351
|
)
|
|
341
352
|
|
|
342
|
-
X = _select(
|
|
343
|
-
y = _col(
|
|
353
|
+
X = _select(df, ["gameid", "num1", "num2", "cat1"])
|
|
354
|
+
y = _col(df, "y")
|
|
344
355
|
model.fit(X, y=y)
|
|
345
356
|
|
|
346
357
|
inner = _inner_estimator(model)
|
|
347
358
|
|
|
348
|
-
if isinstance(
|
|
349
|
-
n_groups =
|
|
359
|
+
if isinstance(df, pl.DataFrame):
|
|
360
|
+
n_groups = df.select(pl.col("gameid").n_unique()).item()
|
|
350
361
|
else:
|
|
351
|
-
n_groups =
|
|
362
|
+
n_groups = df["gameid"].nunique()
|
|
352
363
|
|
|
353
364
|
assert inner.fit_shape[0] == n_groups
|
|
354
365
|
|
|
@@ -724,9 +735,10 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
|
|
|
724
735
|
"gameid": ["g1", "g1", "g2", "g2"],
|
|
725
736
|
"num1": [1.0, 2.0, 3.0, 4.0],
|
|
726
737
|
"num2": [10.0, 20.0, 30.0, 40.0],
|
|
738
|
+
"y": [1.0, 1.0, 2.0, 2.0],
|
|
727
739
|
}
|
|
728
740
|
)
|
|
729
|
-
y =
|
|
741
|
+
y = df["y"]
|
|
730
742
|
|
|
731
743
|
model = AutoPipeline(
|
|
732
744
|
estimator=RandomForestRegressor(n_estimators=5, random_state=42),
|
|
@@ -745,3 +757,127 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
|
|
|
745
757
|
assert list(names.keys()) == list(inner.feature_names_in_)
|
|
746
758
|
assert "gameid" not in names
|
|
747
759
|
assert "const_pred" in names
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
763
|
+
def test_granularity_with_aggregation_weight__features_weighted(frame):
|
|
764
|
+
df_pd = pd.DataFrame(
|
|
765
|
+
{
|
|
766
|
+
"gameid": ["g1", "g1", "g2", "g2"],
|
|
767
|
+
"num1": [10.0, 30.0, 20.0, 40.0],
|
|
768
|
+
"weight": [0.25, 0.75, 0.5, 0.5],
|
|
769
|
+
"y": [1.0, 1.0, 2.0, 2.0],
|
|
770
|
+
}
|
|
771
|
+
)
|
|
772
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
773
|
+
|
|
774
|
+
cap = CaptureEstimator()
|
|
775
|
+
model = AutoPipeline(
|
|
776
|
+
estimator=cap,
|
|
777
|
+
estimator_features=["num1"],
|
|
778
|
+
granularity=["gameid"],
|
|
779
|
+
aggregation_weight="weight",
|
|
780
|
+
remainder="drop",
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
X = _select(df, ["gameid", "num1", "weight"])
|
|
784
|
+
y = _col(df, "y")
|
|
785
|
+
model.fit(X, y=y)
|
|
786
|
+
|
|
787
|
+
inner = _inner_estimator(model)
|
|
788
|
+
assert inner.fit_shape[0] == 2
|
|
789
|
+
|
|
790
|
+
preds = model.predict(X)
|
|
791
|
+
assert preds.shape[0] == len(X)
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
795
|
+
def test_granularity_aggregation_weight__weighted_mean_correct(frame):
|
|
796
|
+
df_pd = pd.DataFrame(
|
|
797
|
+
{
|
|
798
|
+
"gameid": ["g1", "g1"],
|
|
799
|
+
"num1": [10.0, 30.0],
|
|
800
|
+
"weight": [0.25, 0.75],
|
|
801
|
+
"y": [1.0, 1.0],
|
|
802
|
+
}
|
|
803
|
+
)
|
|
804
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
805
|
+
|
|
806
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
807
|
+
|
|
808
|
+
reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
|
|
809
|
+
transformed = reducer.fit_transform(df)
|
|
810
|
+
|
|
811
|
+
if frame == "pl":
|
|
812
|
+
num1_val = transformed["num1"].to_list()[0]
|
|
813
|
+
else:
|
|
814
|
+
num1_val = transformed["num1"].iloc[0]
|
|
815
|
+
|
|
816
|
+
expected = (10.0 * 0.25 + 30.0 * 0.75) / (0.25 + 0.75)
|
|
817
|
+
assert abs(num1_val - expected) < 1e-6
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
821
|
+
def test_reduce_y_raises_when_target_not_uniform_per_group(frame):
|
|
822
|
+
df_pd = pd.DataFrame(
|
|
823
|
+
{
|
|
824
|
+
"gameid": ["g1", "g1"],
|
|
825
|
+
"num1": [10.0, 30.0],
|
|
826
|
+
}
|
|
827
|
+
)
|
|
828
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
829
|
+
|
|
830
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
831
|
+
|
|
832
|
+
reducer = GroupByReducer(granularity=["gameid"])
|
|
833
|
+
|
|
834
|
+
y = np.array([1.0, 2.0])
|
|
835
|
+
with pytest.raises(ValueError, match="Target.*must be uniform"):
|
|
836
|
+
reducer.reduce_y(df, y)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
840
|
+
def test_reduce_y_works_when_target_uniform_per_group(frame):
|
|
841
|
+
df_pd = pd.DataFrame(
|
|
842
|
+
{
|
|
843
|
+
"gameid": ["g1", "g1", "g2", "g2"],
|
|
844
|
+
"num1": [10.0, 30.0, 20.0, 40.0],
|
|
845
|
+
}
|
|
846
|
+
)
|
|
847
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
848
|
+
|
|
849
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
850
|
+
|
|
851
|
+
reducer = GroupByReducer(granularity=["gameid"])
|
|
852
|
+
|
|
853
|
+
y = np.array([1.0, 1.0, 2.0, 2.0])
|
|
854
|
+
y_out, _ = reducer.reduce_y(df, y)
|
|
855
|
+
|
|
856
|
+
assert len(y_out) == 2
|
|
857
|
+
assert set(y_out) == {1.0, 2.0}
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
861
|
+
def test_aggregation_weight_sums_weight_column(frame):
|
|
862
|
+
df_pd = pd.DataFrame(
|
|
863
|
+
{
|
|
864
|
+
"gameid": ["g1", "g1"],
|
|
865
|
+
"num1": [10.0, 30.0],
|
|
866
|
+
"weight": [0.25, 0.75],
|
|
867
|
+
"y": [1.0, 1.0],
|
|
868
|
+
}
|
|
869
|
+
)
|
|
870
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
871
|
+
|
|
872
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
873
|
+
|
|
874
|
+
reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
|
|
875
|
+
transformed = reducer.fit_transform(df)
|
|
876
|
+
|
|
877
|
+
if frame == "pl":
|
|
878
|
+
weight_val = transformed["weight"].to_list()[0]
|
|
879
|
+
else:
|
|
880
|
+
weight_val = transformed["weight"].iloc[0]
|
|
881
|
+
|
|
882
|
+
expected = 0.25 + 0.75
|
|
883
|
+
assert abs(weight_val - expected) < 1e-6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_student_t_distribution_estimator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_regressor_feature_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performance_manager.py
RENAMED
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|