spforge 0.8.4__py3-none-any.whl → 0.8.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- examples/lol/pipeline_transformer_example.py +69 -86
- examples/nba/cross_validation_example.py +4 -11
- examples/nba/feature_engineering_example.py +33 -15
- examples/nba/game_winner_example.py +24 -14
- examples/nba/predictor_transformers_example.py +29 -16
- spforge/__init__.py +1 -0
- spforge/autopipeline.py +169 -5
- spforge/estimator/_group_by_estimator.py +11 -3
- spforge/features_generator_pipeline.py +8 -4
- spforge/hyperparameter_tuning/__init__.py +12 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
- spforge/hyperparameter_tuning/_tuner.py +192 -0
- spforge/performance_transformers/_performance_manager.py +2 -4
- spforge/ratings/__init__.py +4 -0
- spforge/ratings/_player_rating.py +142 -28
- spforge/ratings/league_start_rating_optimizer.py +201 -0
- spforge/ratings/start_rating_generator.py +1 -1
- spforge/ratings/team_start_rating_generator.py +1 -1
- spforge/ratings/utils.py +16 -6
- spforge/scorer/_score.py +42 -11
- spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/METADATA +12 -19
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/RECORD +37 -31
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/WHEEL +1 -1
- tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
- tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
- tests/performance_transformers/test_performance_manager.py +15 -0
- tests/ratings/test_player_rating_generator.py +154 -0
- tests/ratings/test_player_rating_no_mutation.py +214 -0
- tests/ratings/test_utils_scaled_weights.py +136 -0
- tests/scorer/test_score.py +232 -0
- tests/test_autopipeline.py +336 -6
- tests/test_feature_generator_pipeline.py +43 -0
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.4.dist-info → spforge-0.8.18.dist-info}/top_level.txt +0 -0
spforge/autopipeline.py
CHANGED
|
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
|
|
|
195
195
|
return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
|
|
196
196
|
|
|
197
197
|
|
|
198
|
+
def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
|
|
199
|
+
"""Recursively find innermost estimator with feature_importances_ or coef_."""
|
|
200
|
+
if hasattr(estimator, "feature_importances_"):
|
|
201
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
202
|
+
if inner is not None:
|
|
203
|
+
return inner
|
|
204
|
+
return (estimator, "feature_importances_")
|
|
205
|
+
|
|
206
|
+
if hasattr(estimator, "coef_"):
|
|
207
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
208
|
+
if inner is not None:
|
|
209
|
+
return inner
|
|
210
|
+
return (estimator, "coef_")
|
|
211
|
+
|
|
212
|
+
return _get_importance_estimator_inner(estimator)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
|
|
216
|
+
"""Check wrapped estimators for importance attributes."""
|
|
217
|
+
# Check estimator_ (sklearn fitted wrapper convention)
|
|
218
|
+
if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
|
|
219
|
+
result = _get_importance_estimator(estimator.estimator_)
|
|
220
|
+
if result is not None:
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
# Check _est (GroupByEstimator convention)
|
|
224
|
+
if hasattr(estimator, "_est") and estimator._est is not None:
|
|
225
|
+
result = _get_importance_estimator(estimator._est)
|
|
226
|
+
if result is not None:
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
|
|
198
232
|
class AutoPipeline(BaseEstimator):
|
|
199
233
|
def __init__(
|
|
200
234
|
self,
|
|
@@ -202,6 +236,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
202
236
|
estimator_features: list[str],
|
|
203
237
|
predictor_transformers: list[PredictorTransformer] | None = None,
|
|
204
238
|
granularity: list[str] | None = None,
|
|
239
|
+
aggregation_weight: str | None = None,
|
|
205
240
|
filters: list[Filter] | None = None,
|
|
206
241
|
scale_features: bool = False,
|
|
207
242
|
categorical_handling: CategoricalHandling = "auto",
|
|
@@ -216,6 +251,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
216
251
|
self.estimator_features = estimator_features
|
|
217
252
|
self.feature_names = estimator_features # Internal compat
|
|
218
253
|
self.granularity = granularity or []
|
|
254
|
+
self.aggregation_weight = aggregation_weight
|
|
219
255
|
self.predictor_transformers = predictor_transformers
|
|
220
256
|
self.estimator = estimator
|
|
221
257
|
self.filters = filters or []
|
|
@@ -230,6 +266,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
230
266
|
self.numeric_features = numeric_features
|
|
231
267
|
self.remainder = remainder
|
|
232
268
|
self._cat_feats = []
|
|
269
|
+
self._filter_feature_names: list[str] = []
|
|
233
270
|
|
|
234
271
|
# Auto-compute context features
|
|
235
272
|
self.context_feature_names = self._compute_context_features()
|
|
@@ -242,11 +279,12 @@ class AutoPipeline(BaseEstimator):
|
|
|
242
279
|
self._resolved_categorical_handling: CategoricalHandling | None = None
|
|
243
280
|
|
|
244
281
|
def _compute_context_features(self) -> list[str]:
|
|
245
|
-
"""Auto-compute context features from estimator
|
|
282
|
+
"""Auto-compute context features from estimator and granularity.
|
|
246
283
|
|
|
247
284
|
Note: Context from predictor_transformers is tracked separately in
|
|
248
285
|
context_predictor_transformer_feature_names and is dropped before
|
|
249
|
-
the final estimator.
|
|
286
|
+
the final estimator. Filter columns are tracked separately and are
|
|
287
|
+
dropped before the final estimator.
|
|
250
288
|
"""
|
|
251
289
|
from spforge.transformers._base import PredictorTransformer
|
|
252
290
|
|
|
@@ -290,9 +328,15 @@ class AutoPipeline(BaseEstimator):
|
|
|
290
328
|
# Add granularity columns
|
|
291
329
|
context.extend(self.granularity)
|
|
292
330
|
|
|
331
|
+
# Add aggregation weight column
|
|
332
|
+
if self.aggregation_weight:
|
|
333
|
+
context.append(self.aggregation_weight)
|
|
334
|
+
|
|
293
335
|
# Add filter columns
|
|
336
|
+
self._filter_feature_names = []
|
|
294
337
|
for f in self.filters:
|
|
295
|
-
|
|
338
|
+
if f.column_name not in self._filter_feature_names:
|
|
339
|
+
self._filter_feature_names.append(f.column_name)
|
|
296
340
|
|
|
297
341
|
# Dedupe while preserving order, excluding estimator_features
|
|
298
342
|
seen = set()
|
|
@@ -454,7 +498,11 @@ class AutoPipeline(BaseEstimator):
|
|
|
454
498
|
pre = PreprocessorToDataFrame(pre_raw)
|
|
455
499
|
|
|
456
500
|
est = (
|
|
457
|
-
GroupByEstimator(
|
|
501
|
+
GroupByEstimator(
|
|
502
|
+
self.estimator,
|
|
503
|
+
granularity=[f"{c}" for c in self.granularity],
|
|
504
|
+
aggregation_weight=self.aggregation_weight,
|
|
505
|
+
)
|
|
458
506
|
if do_groupby
|
|
459
507
|
else self.estimator
|
|
460
508
|
)
|
|
@@ -506,8 +554,10 @@ class AutoPipeline(BaseEstimator):
|
|
|
506
554
|
prev_transformer_feats_out.extend(feats_out)
|
|
507
555
|
|
|
508
556
|
# Use FunctionTransformer with global function for serializability
|
|
557
|
+
drop_filter_cols = set(self._filter_feature_names)
|
|
558
|
+
drop_cols = drop_ctx_set | drop_filter_cols
|
|
509
559
|
final = FunctionTransformer(
|
|
510
|
-
_drop_columns_transformer, validate=False, kw_args={"drop_cols":
|
|
560
|
+
_drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_cols}
|
|
511
561
|
)
|
|
512
562
|
steps.append(("final", final))
|
|
513
563
|
|
|
@@ -538,6 +588,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
538
588
|
self.feature_names
|
|
539
589
|
+ self.context_feature_names
|
|
540
590
|
+ self.context_predictor_transformer_feature_names
|
|
591
|
+
+ self._filter_feature_names
|
|
541
592
|
+ self.granularity
|
|
542
593
|
)
|
|
543
594
|
)
|
|
@@ -626,4 +677,117 @@ class AutoPipeline(BaseEstimator):
|
|
|
626
677
|
if ctx not in all_features:
|
|
627
678
|
all_features.append(ctx)
|
|
628
679
|
|
|
680
|
+
# Add filter columns (needed for fit-time filtering)
|
|
681
|
+
for col in self._filter_feature_names:
|
|
682
|
+
if col not in all_features:
|
|
683
|
+
all_features.append(col)
|
|
684
|
+
|
|
629
685
|
return all_features
|
|
686
|
+
|
|
687
|
+
def _get_estimator_feature_names(self) -> list[str]:
|
|
688
|
+
"""Get feature names as seen by the final estimator after all transformations."""
|
|
689
|
+
pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
|
|
690
|
+
|
|
691
|
+
# Remove context columns dropped by "final" step
|
|
692
|
+
final_step = self.sklearn_pipeline.named_steps["final"]
|
|
693
|
+
drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
|
|
694
|
+
features = [f for f in pre_out if f not in drop_cols]
|
|
695
|
+
|
|
696
|
+
# Remove granularity columns (dropped by GroupByEstimator)
|
|
697
|
+
granularity_set = set(self.granularity)
|
|
698
|
+
features = [f for f in features if f not in granularity_set]
|
|
699
|
+
|
|
700
|
+
# Remove context features (used by wrapper estimators, not inner model)
|
|
701
|
+
context_set = set(self.context_feature_names)
|
|
702
|
+
features = [f for f in features if f not in context_set]
|
|
703
|
+
|
|
704
|
+
# Remove filter columns (used only for fit-time filtering)
|
|
705
|
+
filter_set = set(self._filter_feature_names)
|
|
706
|
+
features = [f for f in features if f not in filter_set]
|
|
707
|
+
|
|
708
|
+
return features
|
|
709
|
+
|
|
710
|
+
def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
|
|
711
|
+
names = None
|
|
712
|
+
if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
|
|
713
|
+
names = list(estimator.feature_names_in_)
|
|
714
|
+
elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
|
|
715
|
+
names = list(estimator.feature_name_)
|
|
716
|
+
elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
|
|
717
|
+
names = list(estimator.feature_names_)
|
|
718
|
+
if names is None:
|
|
719
|
+
names = self._get_estimator_feature_names()
|
|
720
|
+
if len(names) != n_features:
|
|
721
|
+
raise ValueError(
|
|
722
|
+
f"Feature names length ({len(names)}) does not match importances length ({n_features})."
|
|
723
|
+
)
|
|
724
|
+
return names
|
|
725
|
+
|
|
726
|
+
@property
|
|
727
|
+
def feature_importances_(self) -> pd.DataFrame:
|
|
728
|
+
"""Get feature importances from the fitted estimator.
|
|
729
|
+
|
|
730
|
+
Returns a DataFrame with columns ["feature", "importance"] sorted by
|
|
731
|
+
absolute importance descending. Works with tree-based models
|
|
732
|
+
(feature_importances_) and linear models (coef_).
|
|
733
|
+
"""
|
|
734
|
+
if self.sklearn_pipeline is None:
|
|
735
|
+
raise RuntimeError("Pipeline not fitted. Call fit() first.")
|
|
736
|
+
|
|
737
|
+
est = self.sklearn_pipeline.named_steps["est"]
|
|
738
|
+
result = _get_importance_estimator(est)
|
|
739
|
+
|
|
740
|
+
if result is None:
|
|
741
|
+
raise RuntimeError(
|
|
742
|
+
"Estimator does not support feature importances. "
|
|
743
|
+
"Requires feature_importances_ or coef_ attribute."
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
inner_est, attr_name = result
|
|
747
|
+
raw = getattr(inner_est, attr_name)
|
|
748
|
+
|
|
749
|
+
if attr_name == "coef_":
|
|
750
|
+
# Linear models: use absolute value of coefficients
|
|
751
|
+
if raw.ndim == 2:
|
|
752
|
+
# Multi-class: average absolute values across classes
|
|
753
|
+
importances = np.abs(raw).mean(axis=0)
|
|
754
|
+
else:
|
|
755
|
+
importances = np.abs(raw)
|
|
756
|
+
else:
|
|
757
|
+
importances = raw
|
|
758
|
+
|
|
759
|
+
feature_names = self._get_estimator_feature_names()
|
|
760
|
+
|
|
761
|
+
df = pd.DataFrame({"feature": feature_names, "importance": importances})
|
|
762
|
+
df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
|
|
763
|
+
return df
|
|
764
|
+
|
|
765
|
+
@property
|
|
766
|
+
def feature_importance_names(self) -> dict[str, float]:
|
|
767
|
+
"""Map deepest estimator feature names to importances."""
|
|
768
|
+
if self.sklearn_pipeline is None:
|
|
769
|
+
raise RuntimeError("Pipeline not fitted. Call fit() first.")
|
|
770
|
+
|
|
771
|
+
est = self.sklearn_pipeline.named_steps["est"]
|
|
772
|
+
result = _get_importance_estimator(est)
|
|
773
|
+
|
|
774
|
+
if result is None:
|
|
775
|
+
raise RuntimeError(
|
|
776
|
+
"Estimator does not support feature importances. "
|
|
777
|
+
"Requires feature_importances_ or coef_ attribute."
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
inner_est, attr_name = result
|
|
781
|
+
raw = getattr(inner_est, attr_name)
|
|
782
|
+
|
|
783
|
+
if attr_name == "coef_":
|
|
784
|
+
if raw.ndim == 2:
|
|
785
|
+
importances = np.abs(raw).mean(axis=0)
|
|
786
|
+
else:
|
|
787
|
+
importances = np.abs(raw)
|
|
788
|
+
else:
|
|
789
|
+
importances = raw
|
|
790
|
+
|
|
791
|
+
importances = np.asarray(importances)
|
|
792
|
+
feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
|
|
793
|
+
return dict(zip(feature_names, importances.tolist()))
|
|
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class GroupByEstimator(BaseEstimator):
|
|
13
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
estimator: Any,
|
|
16
|
+
granularity: list[str] | None = None,
|
|
17
|
+
aggregation_weight: str | None = None,
|
|
18
|
+
):
|
|
14
19
|
self.estimator = estimator
|
|
15
20
|
self.granularity = granularity or []
|
|
16
|
-
self.
|
|
21
|
+
self.aggregation_weight = aggregation_weight
|
|
22
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
|
|
17
23
|
self._est = None
|
|
18
24
|
|
|
19
25
|
def __sklearn_is_fitted__(self):
|
|
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
|
|
|
22
28
|
@nw.narwhalify
|
|
23
29
|
def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
|
|
24
30
|
X = X.to_pandas()
|
|
25
|
-
|
|
31
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
32
|
+
agg_weight = getattr(self, "aggregation_weight", None)
|
|
33
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
|
|
26
34
|
X_red = nw.from_native(self._reducer.fit_transform(X))
|
|
27
35
|
y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
|
|
28
36
|
|
|
@@ -120,7 +120,8 @@ class FeatureGeneratorPipeline(FeatureGenerator):
|
|
|
120
120
|
|
|
121
121
|
for transformer in self.feature_generators:
|
|
122
122
|
pre_row_count = len(df)
|
|
123
|
-
|
|
123
|
+
native_df = df.to_native()
|
|
124
|
+
df = nw.from_native(transformer.fit_transform(native_df, column_names=column_names))
|
|
124
125
|
assert len(df) == pre_row_count
|
|
125
126
|
for f in transformer.features_out:
|
|
126
127
|
if f in expected_feats_added:
|
|
@@ -151,7 +152,8 @@ class FeatureGeneratorPipeline(FeatureGenerator):
|
|
|
151
152
|
|
|
152
153
|
for transformer in self.feature_generators:
|
|
153
154
|
pre_row_count = len(df)
|
|
154
|
-
|
|
155
|
+
native_df = df.to_native()
|
|
156
|
+
df = nw.from_native(transformer.transform(native_df))
|
|
155
157
|
assert len(df) == pre_row_count
|
|
156
158
|
for f in transformer.features_out:
|
|
157
159
|
if f in expected_feats_added:
|
|
@@ -181,9 +183,11 @@ class FeatureGeneratorPipeline(FeatureGenerator):
|
|
|
181
183
|
for transformer in self.feature_generators:
|
|
182
184
|
pre_row_count = len(df)
|
|
183
185
|
if hasattr(transformer, "future_transform") and callable(transformer.future_transform):
|
|
184
|
-
|
|
186
|
+
native_df = df.to_native()
|
|
187
|
+
df = nw.from_native(transformer.future_transform(native_df))
|
|
185
188
|
else:
|
|
186
|
-
|
|
189
|
+
native_df = df.to_native()
|
|
190
|
+
df = nw.from_native(transformer.transform(native_df))
|
|
187
191
|
assert len(df) == pre_row_count
|
|
188
192
|
for f in transformer.features_out:
|
|
189
193
|
if f in expected_feats_added:
|
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
2
|
+
get_default_estimator_search_space,
|
|
3
|
+
get_default_lgbm_search_space,
|
|
4
|
+
get_default_negative_binomial_search_space,
|
|
5
|
+
get_default_normal_distribution_search_space,
|
|
2
6
|
get_default_player_rating_search_space,
|
|
3
7
|
get_default_search_space,
|
|
8
|
+
get_default_student_t_search_space,
|
|
4
9
|
get_default_team_rating_search_space,
|
|
5
10
|
)
|
|
6
11
|
from spforge.hyperparameter_tuning._tuner import (
|
|
12
|
+
EstimatorHyperparameterTuner,
|
|
7
13
|
OptunaResult,
|
|
8
14
|
ParamSpec,
|
|
9
15
|
RatingHyperparameterTuner,
|
|
@@ -11,9 +17,15 @@ from spforge.hyperparameter_tuning._tuner import (
|
|
|
11
17
|
|
|
12
18
|
__all__ = [
|
|
13
19
|
"RatingHyperparameterTuner",
|
|
20
|
+
"EstimatorHyperparameterTuner",
|
|
14
21
|
"ParamSpec",
|
|
15
22
|
"OptunaResult",
|
|
23
|
+
"get_default_estimator_search_space",
|
|
24
|
+
"get_default_lgbm_search_space",
|
|
25
|
+
"get_default_negative_binomial_search_space",
|
|
26
|
+
"get_default_normal_distribution_search_space",
|
|
16
27
|
"get_default_player_rating_search_space",
|
|
17
28
|
"get_default_team_rating_search_space",
|
|
29
|
+
"get_default_student_t_search_space",
|
|
18
30
|
"get_default_search_space",
|
|
19
31
|
]
|
|
@@ -1,12 +1,133 @@
|
|
|
1
1
|
from spforge.hyperparameter_tuning._tuner import ParamSpec
|
|
2
2
|
from spforge.ratings import PlayerRatingGenerator, TeamRatingGenerator
|
|
3
|
+
from spforge.distributions import (
|
|
4
|
+
NegativeBinomialEstimator,
|
|
5
|
+
NormalDistributionPredictor,
|
|
6
|
+
StudentTDistributionEstimator,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _is_lightgbm_estimator(obj: object) -> bool:
|
|
11
|
+
mod = (getattr(type(obj), "__module__", "") or "").lower()
|
|
12
|
+
name = type(obj).__name__
|
|
13
|
+
if "lightgbm" in mod:
|
|
14
|
+
return True
|
|
15
|
+
return bool(name.startswith("LGBM"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_default_lgbm_search_space() -> dict[str, ParamSpec]:
|
|
19
|
+
return {
|
|
20
|
+
"n_estimators": ParamSpec(
|
|
21
|
+
param_type="int",
|
|
22
|
+
low=50,
|
|
23
|
+
high=800,
|
|
24
|
+
log=True,
|
|
25
|
+
),
|
|
26
|
+
"num_leaves": ParamSpec(
|
|
27
|
+
param_type="int",
|
|
28
|
+
low=16,
|
|
29
|
+
high=256,
|
|
30
|
+
log=True,
|
|
31
|
+
),
|
|
32
|
+
"max_depth": ParamSpec(
|
|
33
|
+
param_type="int",
|
|
34
|
+
low=3,
|
|
35
|
+
high=12,
|
|
36
|
+
),
|
|
37
|
+
"min_child_samples": ParamSpec(
|
|
38
|
+
param_type="int",
|
|
39
|
+
low=10,
|
|
40
|
+
high=200,
|
|
41
|
+
log=True,
|
|
42
|
+
),
|
|
43
|
+
"subsample": ParamSpec(
|
|
44
|
+
param_type="float",
|
|
45
|
+
low=0.6,
|
|
46
|
+
high=1.0,
|
|
47
|
+
),
|
|
48
|
+
"subsample_freq": ParamSpec(
|
|
49
|
+
param_type="int",
|
|
50
|
+
low=1,
|
|
51
|
+
high=7,
|
|
52
|
+
),
|
|
53
|
+
"reg_alpha": ParamSpec(
|
|
54
|
+
param_type="float",
|
|
55
|
+
low=1e-8,
|
|
56
|
+
high=10.0,
|
|
57
|
+
log=True,
|
|
58
|
+
),
|
|
59
|
+
"reg_lambda": ParamSpec(
|
|
60
|
+
param_type="float",
|
|
61
|
+
low=1e-8,
|
|
62
|
+
high=10.0,
|
|
63
|
+
log=True,
|
|
64
|
+
),
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_default_negative_binomial_search_space() -> dict[str, ParamSpec]:
|
|
69
|
+
return {
|
|
70
|
+
"predicted_r_weight": ParamSpec(
|
|
71
|
+
param_type="float",
|
|
72
|
+
low=0.0,
|
|
73
|
+
high=1.0,
|
|
74
|
+
),
|
|
75
|
+
"r_rolling_mean_window": ParamSpec(
|
|
76
|
+
param_type="int",
|
|
77
|
+
low=10,
|
|
78
|
+
high=120,
|
|
79
|
+
),
|
|
80
|
+
"predicted_r_iterations": ParamSpec(
|
|
81
|
+
param_type="int",
|
|
82
|
+
low=2,
|
|
83
|
+
high=12,
|
|
84
|
+
),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_default_normal_distribution_search_space() -> dict[str, ParamSpec]:
|
|
89
|
+
return {
|
|
90
|
+
"sigma": ParamSpec(
|
|
91
|
+
param_type="float",
|
|
92
|
+
low=0.5,
|
|
93
|
+
high=30.0,
|
|
94
|
+
log=True,
|
|
95
|
+
),
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_default_student_t_search_space() -> dict[str, ParamSpec]:
|
|
100
|
+
return {
|
|
101
|
+
"df": ParamSpec(
|
|
102
|
+
param_type="float",
|
|
103
|
+
low=3.0,
|
|
104
|
+
high=30.0,
|
|
105
|
+
log=True,
|
|
106
|
+
),
|
|
107
|
+
"min_sigma": ParamSpec(
|
|
108
|
+
param_type="float",
|
|
109
|
+
low=0.5,
|
|
110
|
+
high=10.0,
|
|
111
|
+
log=True,
|
|
112
|
+
),
|
|
113
|
+
"sigma_bins": ParamSpec(
|
|
114
|
+
param_type="int",
|
|
115
|
+
low=4,
|
|
116
|
+
high=12,
|
|
117
|
+
),
|
|
118
|
+
"min_bin_rows": ParamSpec(
|
|
119
|
+
param_type="int",
|
|
120
|
+
low=10,
|
|
121
|
+
high=100,
|
|
122
|
+
),
|
|
123
|
+
}
|
|
3
124
|
|
|
4
125
|
|
|
5
126
|
def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
6
127
|
"""
|
|
7
128
|
Default search space for PlayerRatingGenerator.
|
|
8
129
|
|
|
9
|
-
Focuses on
|
|
130
|
+
Focuses on core parameters that have the most impact on performance.
|
|
10
131
|
|
|
11
132
|
Returns:
|
|
12
133
|
Dictionary mapping parameter names to ParamSpec objects
|
|
@@ -46,6 +167,31 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
46
167
|
param_type="categorical",
|
|
47
168
|
choices=["difference", "mean", "ignore_opponent"],
|
|
48
169
|
),
|
|
170
|
+
"start_league_quantile": ParamSpec(
|
|
171
|
+
param_type="float",
|
|
172
|
+
low=0.05,
|
|
173
|
+
high=0.5,
|
|
174
|
+
),
|
|
175
|
+
"start_min_count_for_percentiles": ParamSpec(
|
|
176
|
+
param_type="int",
|
|
177
|
+
low=40,
|
|
178
|
+
high=500,
|
|
179
|
+
),
|
|
180
|
+
"start_team_rating_subtract": ParamSpec(
|
|
181
|
+
param_type="float",
|
|
182
|
+
low=0.0,
|
|
183
|
+
high=200.0,
|
|
184
|
+
),
|
|
185
|
+
"start_team_weight": ParamSpec(
|
|
186
|
+
param_type="float",
|
|
187
|
+
low=0.0,
|
|
188
|
+
high=1.0,
|
|
189
|
+
),
|
|
190
|
+
"start_min_match_count_team_rating": ParamSpec(
|
|
191
|
+
param_type="int",
|
|
192
|
+
low=1,
|
|
193
|
+
high=10,
|
|
194
|
+
),
|
|
49
195
|
}
|
|
50
196
|
|
|
51
197
|
|
|
@@ -120,3 +266,15 @@ def get_default_search_space(
|
|
|
120
266
|
f"Unsupported rating generator type: {type(rating_generator)}. "
|
|
121
267
|
"Expected PlayerRatingGenerator or TeamRatingGenerator."
|
|
122
268
|
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def get_default_estimator_search_space(estimator: object) -> dict[str, ParamSpec]:
|
|
272
|
+
if _is_lightgbm_estimator(estimator):
|
|
273
|
+
return get_default_lgbm_search_space()
|
|
274
|
+
if isinstance(estimator, NegativeBinomialEstimator):
|
|
275
|
+
return get_default_negative_binomial_search_space()
|
|
276
|
+
if isinstance(estimator, NormalDistributionPredictor):
|
|
277
|
+
return get_default_normal_distribution_search_space()
|
|
278
|
+
if isinstance(estimator, StudentTDistributionEstimator):
|
|
279
|
+
return get_default_student_t_search_space()
|
|
280
|
+
return {}
|