spforge 0.8.8__py3-none-any.whl → 0.8.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/autopipeline.py +169 -5
- spforge/estimator/_group_by_estimator.py +11 -3
- spforge/hyperparameter_tuning/__init__.py +2 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +38 -23
- spforge/hyperparameter_tuning/_tuner.py +55 -2
- spforge/performance_transformers/_performance_manager.py +2 -4
- spforge/ratings/_player_rating.py +131 -28
- spforge/ratings/start_rating_generator.py +1 -1
- spforge/ratings/team_start_rating_generator.py +1 -1
- spforge/ratings/utils.py +16 -6
- spforge/scorer/_score.py +42 -11
- spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.8.dist-info → spforge-0.8.19.dist-info}/METADATA +1 -1
- {spforge-0.8.8.dist-info → spforge-0.8.19.dist-info}/RECORD +25 -23
- {spforge-0.8.8.dist-info → spforge-0.8.19.dist-info}/WHEEL +1 -1
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -4
- tests/hyperparameter_tuning/test_rating_tuner.py +157 -0
- tests/performance_transformers/test_performance_manager.py +15 -0
- tests/ratings/test_player_rating_generator.py +127 -0
- tests/ratings/test_player_rating_no_mutation.py +214 -0
- tests/ratings/test_utils_scaled_weights.py +136 -0
- tests/scorer/test_score.py +142 -0
- tests/test_autopipeline.py +336 -6
- {spforge-0.8.8.dist-info → spforge-0.8.19.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.8.dist-info → spforge-0.8.19.dist-info}/top_level.txt +0 -0
spforge/autopipeline.py
CHANGED
|
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
|
|
|
195
195
|
return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
|
|
196
196
|
|
|
197
197
|
|
|
198
|
+
def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
|
|
199
|
+
"""Recursively find innermost estimator with feature_importances_ or coef_."""
|
|
200
|
+
if hasattr(estimator, "feature_importances_"):
|
|
201
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
202
|
+
if inner is not None:
|
|
203
|
+
return inner
|
|
204
|
+
return (estimator, "feature_importances_")
|
|
205
|
+
|
|
206
|
+
if hasattr(estimator, "coef_"):
|
|
207
|
+
inner = _get_importance_estimator_inner(estimator)
|
|
208
|
+
if inner is not None:
|
|
209
|
+
return inner
|
|
210
|
+
return (estimator, "coef_")
|
|
211
|
+
|
|
212
|
+
return _get_importance_estimator_inner(estimator)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
|
|
216
|
+
"""Check wrapped estimators for importance attributes."""
|
|
217
|
+
# Check estimator_ (sklearn fitted wrapper convention)
|
|
218
|
+
if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
|
|
219
|
+
result = _get_importance_estimator(estimator.estimator_)
|
|
220
|
+
if result is not None:
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
# Check _est (GroupByEstimator convention)
|
|
224
|
+
if hasattr(estimator, "_est") and estimator._est is not None:
|
|
225
|
+
result = _get_importance_estimator(estimator._est)
|
|
226
|
+
if result is not None:
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
|
|
198
232
|
class AutoPipeline(BaseEstimator):
|
|
199
233
|
def __init__(
|
|
200
234
|
self,
|
|
@@ -202,6 +236,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
202
236
|
estimator_features: list[str],
|
|
203
237
|
predictor_transformers: list[PredictorTransformer] | None = None,
|
|
204
238
|
granularity: list[str] | None = None,
|
|
239
|
+
aggregation_weight: str | None = None,
|
|
205
240
|
filters: list[Filter] | None = None,
|
|
206
241
|
scale_features: bool = False,
|
|
207
242
|
categorical_handling: CategoricalHandling = "auto",
|
|
@@ -216,6 +251,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
216
251
|
self.estimator_features = estimator_features
|
|
217
252
|
self.feature_names = estimator_features # Internal compat
|
|
218
253
|
self.granularity = granularity or []
|
|
254
|
+
self.aggregation_weight = aggregation_weight
|
|
219
255
|
self.predictor_transformers = predictor_transformers
|
|
220
256
|
self.estimator = estimator
|
|
221
257
|
self.filters = filters or []
|
|
@@ -230,6 +266,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
230
266
|
self.numeric_features = numeric_features
|
|
231
267
|
self.remainder = remainder
|
|
232
268
|
self._cat_feats = []
|
|
269
|
+
self._filter_feature_names: list[str] = []
|
|
233
270
|
|
|
234
271
|
# Auto-compute context features
|
|
235
272
|
self.context_feature_names = self._compute_context_features()
|
|
@@ -242,11 +279,12 @@ class AutoPipeline(BaseEstimator):
|
|
|
242
279
|
self._resolved_categorical_handling: CategoricalHandling | None = None
|
|
243
280
|
|
|
244
281
|
def _compute_context_features(self) -> list[str]:
|
|
245
|
-
"""Auto-compute context features from estimator
|
|
282
|
+
"""Auto-compute context features from estimator and granularity.
|
|
246
283
|
|
|
247
284
|
Note: Context from predictor_transformers is tracked separately in
|
|
248
285
|
context_predictor_transformer_feature_names and is dropped before
|
|
249
|
-
the final estimator.
|
|
286
|
+
the final estimator. Filter columns are tracked separately and are
|
|
287
|
+
dropped before the final estimator.
|
|
250
288
|
"""
|
|
251
289
|
from spforge.transformers._base import PredictorTransformer
|
|
252
290
|
|
|
@@ -290,9 +328,15 @@ class AutoPipeline(BaseEstimator):
|
|
|
290
328
|
# Add granularity columns
|
|
291
329
|
context.extend(self.granularity)
|
|
292
330
|
|
|
331
|
+
# Add aggregation weight column
|
|
332
|
+
if self.aggregation_weight:
|
|
333
|
+
context.append(self.aggregation_weight)
|
|
334
|
+
|
|
293
335
|
# Add filter columns
|
|
336
|
+
self._filter_feature_names = []
|
|
294
337
|
for f in self.filters:
|
|
295
|
-
|
|
338
|
+
if f.column_name not in self._filter_feature_names:
|
|
339
|
+
self._filter_feature_names.append(f.column_name)
|
|
296
340
|
|
|
297
341
|
# Dedupe while preserving order, excluding estimator_features
|
|
298
342
|
seen = set()
|
|
@@ -454,7 +498,11 @@ class AutoPipeline(BaseEstimator):
|
|
|
454
498
|
pre = PreprocessorToDataFrame(pre_raw)
|
|
455
499
|
|
|
456
500
|
est = (
|
|
457
|
-
GroupByEstimator(
|
|
501
|
+
GroupByEstimator(
|
|
502
|
+
self.estimator,
|
|
503
|
+
granularity=[f"{c}" for c in self.granularity],
|
|
504
|
+
aggregation_weight=self.aggregation_weight,
|
|
505
|
+
)
|
|
458
506
|
if do_groupby
|
|
459
507
|
else self.estimator
|
|
460
508
|
)
|
|
@@ -506,8 +554,10 @@ class AutoPipeline(BaseEstimator):
|
|
|
506
554
|
prev_transformer_feats_out.extend(feats_out)
|
|
507
555
|
|
|
508
556
|
# Use FunctionTransformer with global function for serializability
|
|
557
|
+
drop_filter_cols = set(self._filter_feature_names)
|
|
558
|
+
drop_cols = drop_ctx_set | drop_filter_cols
|
|
509
559
|
final = FunctionTransformer(
|
|
510
|
-
_drop_columns_transformer, validate=False, kw_args={"drop_cols":
|
|
560
|
+
_drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_cols}
|
|
511
561
|
)
|
|
512
562
|
steps.append(("final", final))
|
|
513
563
|
|
|
@@ -538,6 +588,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
538
588
|
self.feature_names
|
|
539
589
|
+ self.context_feature_names
|
|
540
590
|
+ self.context_predictor_transformer_feature_names
|
|
591
|
+
+ self._filter_feature_names
|
|
541
592
|
+ self.granularity
|
|
542
593
|
)
|
|
543
594
|
)
|
|
@@ -626,4 +677,117 @@ class AutoPipeline(BaseEstimator):
|
|
|
626
677
|
if ctx not in all_features:
|
|
627
678
|
all_features.append(ctx)
|
|
628
679
|
|
|
680
|
+
# Add filter columns (needed for fit-time filtering)
|
|
681
|
+
for col in self._filter_feature_names:
|
|
682
|
+
if col not in all_features:
|
|
683
|
+
all_features.append(col)
|
|
684
|
+
|
|
629
685
|
return all_features
|
|
686
|
+
|
|
687
|
+
def _get_estimator_feature_names(self) -> list[str]:
|
|
688
|
+
"""Get feature names as seen by the final estimator after all transformations."""
|
|
689
|
+
pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
|
|
690
|
+
|
|
691
|
+
# Remove context columns dropped by "final" step
|
|
692
|
+
final_step = self.sklearn_pipeline.named_steps["final"]
|
|
693
|
+
drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
|
|
694
|
+
features = [f for f in pre_out if f not in drop_cols]
|
|
695
|
+
|
|
696
|
+
# Remove granularity columns (dropped by GroupByEstimator)
|
|
697
|
+
granularity_set = set(self.granularity)
|
|
698
|
+
features = [f for f in features if f not in granularity_set]
|
|
699
|
+
|
|
700
|
+
# Remove context features (used by wrapper estimators, not inner model)
|
|
701
|
+
context_set = set(self.context_feature_names)
|
|
702
|
+
features = [f for f in features if f not in context_set]
|
|
703
|
+
|
|
704
|
+
# Remove filter columns (used only for fit-time filtering)
|
|
705
|
+
filter_set = set(self._filter_feature_names)
|
|
706
|
+
features = [f for f in features if f not in filter_set]
|
|
707
|
+
|
|
708
|
+
return features
|
|
709
|
+
|
|
710
|
+
def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
|
|
711
|
+
names = None
|
|
712
|
+
if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
|
|
713
|
+
names = list(estimator.feature_names_in_)
|
|
714
|
+
elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
|
|
715
|
+
names = list(estimator.feature_name_)
|
|
716
|
+
elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
|
|
717
|
+
names = list(estimator.feature_names_)
|
|
718
|
+
if names is None:
|
|
719
|
+
names = self._get_estimator_feature_names()
|
|
720
|
+
if len(names) != n_features:
|
|
721
|
+
raise ValueError(
|
|
722
|
+
f"Feature names length ({len(names)}) does not match importances length ({n_features})."
|
|
723
|
+
)
|
|
724
|
+
return names
|
|
725
|
+
|
|
726
|
+
@property
|
|
727
|
+
def feature_importances_(self) -> pd.DataFrame:
|
|
728
|
+
"""Get feature importances from the fitted estimator.
|
|
729
|
+
|
|
730
|
+
Returns a DataFrame with columns ["feature", "importance"] sorted by
|
|
731
|
+
absolute importance descending. Works with tree-based models
|
|
732
|
+
(feature_importances_) and linear models (coef_).
|
|
733
|
+
"""
|
|
734
|
+
if self.sklearn_pipeline is None:
|
|
735
|
+
raise RuntimeError("Pipeline not fitted. Call fit() first.")
|
|
736
|
+
|
|
737
|
+
est = self.sklearn_pipeline.named_steps["est"]
|
|
738
|
+
result = _get_importance_estimator(est)
|
|
739
|
+
|
|
740
|
+
if result is None:
|
|
741
|
+
raise RuntimeError(
|
|
742
|
+
"Estimator does not support feature importances. "
|
|
743
|
+
"Requires feature_importances_ or coef_ attribute."
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
inner_est, attr_name = result
|
|
747
|
+
raw = getattr(inner_est, attr_name)
|
|
748
|
+
|
|
749
|
+
if attr_name == "coef_":
|
|
750
|
+
# Linear models: use absolute value of coefficients
|
|
751
|
+
if raw.ndim == 2:
|
|
752
|
+
# Multi-class: average absolute values across classes
|
|
753
|
+
importances = np.abs(raw).mean(axis=0)
|
|
754
|
+
else:
|
|
755
|
+
importances = np.abs(raw)
|
|
756
|
+
else:
|
|
757
|
+
importances = raw
|
|
758
|
+
|
|
759
|
+
feature_names = self._get_estimator_feature_names()
|
|
760
|
+
|
|
761
|
+
df = pd.DataFrame({"feature": feature_names, "importance": importances})
|
|
762
|
+
df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
|
|
763
|
+
return df
|
|
764
|
+
|
|
765
|
+
@property
|
|
766
|
+
def feature_importance_names(self) -> dict[str, float]:
|
|
767
|
+
"""Map deepest estimator feature names to importances."""
|
|
768
|
+
if self.sklearn_pipeline is None:
|
|
769
|
+
raise RuntimeError("Pipeline not fitted. Call fit() first.")
|
|
770
|
+
|
|
771
|
+
est = self.sklearn_pipeline.named_steps["est"]
|
|
772
|
+
result = _get_importance_estimator(est)
|
|
773
|
+
|
|
774
|
+
if result is None:
|
|
775
|
+
raise RuntimeError(
|
|
776
|
+
"Estimator does not support feature importances. "
|
|
777
|
+
"Requires feature_importances_ or coef_ attribute."
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
inner_est, attr_name = result
|
|
781
|
+
raw = getattr(inner_est, attr_name)
|
|
782
|
+
|
|
783
|
+
if attr_name == "coef_":
|
|
784
|
+
if raw.ndim == 2:
|
|
785
|
+
importances = np.abs(raw).mean(axis=0)
|
|
786
|
+
else:
|
|
787
|
+
importances = np.abs(raw)
|
|
788
|
+
else:
|
|
789
|
+
importances = raw
|
|
790
|
+
|
|
791
|
+
importances = np.asarray(importances)
|
|
792
|
+
feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
|
|
793
|
+
return dict(zip(feature_names, importances.tolist()))
|
|
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class GroupByEstimator(BaseEstimator):
|
|
13
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
estimator: Any,
|
|
16
|
+
granularity: list[str] | None = None,
|
|
17
|
+
aggregation_weight: str | None = None,
|
|
18
|
+
):
|
|
14
19
|
self.estimator = estimator
|
|
15
20
|
self.granularity = granularity or []
|
|
16
|
-
self.
|
|
21
|
+
self.aggregation_weight = aggregation_weight
|
|
22
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
|
|
17
23
|
self._est = None
|
|
18
24
|
|
|
19
25
|
def __sklearn_is_fitted__(self):
|
|
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
|
|
|
22
28
|
@nw.narwhalify
|
|
23
29
|
def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
|
|
24
30
|
X = X.to_pandas()
|
|
25
|
-
|
|
31
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
32
|
+
agg_weight = getattr(self, "aggregation_weight", None)
|
|
33
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
|
|
26
34
|
X_red = nw.from_native(self._reducer.fit_transform(X))
|
|
27
35
|
y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
|
|
28
36
|
|
|
@@ -7,6 +7,7 @@ from spforge.hyperparameter_tuning._default_search_spaces import (
|
|
|
7
7
|
get_default_search_space,
|
|
8
8
|
get_default_student_t_search_space,
|
|
9
9
|
get_default_team_rating_search_space,
|
|
10
|
+
get_full_player_rating_search_space,
|
|
10
11
|
)
|
|
11
12
|
from spforge.hyperparameter_tuning._tuner import (
|
|
12
13
|
EstimatorHyperparameterTuner,
|
|
@@ -28,4 +29,5 @@ __all__ = [
|
|
|
28
29
|
"get_default_team_rating_search_space",
|
|
29
30
|
"get_default_student_t_search_space",
|
|
30
31
|
"get_default_search_space",
|
|
32
|
+
"get_full_player_rating_search_space",
|
|
31
33
|
]
|
|
@@ -128,6 +128,7 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
128
128
|
Default search space for PlayerRatingGenerator.
|
|
129
129
|
|
|
130
130
|
Focuses on core parameters that have the most impact on performance.
|
|
131
|
+
Excludes performance_predictor and team-based start rating params.
|
|
131
132
|
|
|
132
133
|
Returns:
|
|
133
134
|
Dictionary mapping parameter names to ParamSpec objects
|
|
@@ -163,10 +164,6 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
163
164
|
"use_off_def_split": ParamSpec(
|
|
164
165
|
param_type="bool",
|
|
165
166
|
),
|
|
166
|
-
"performance_predictor": ParamSpec(
|
|
167
|
-
param_type="categorical",
|
|
168
|
-
choices=["difference", "mean", "ignore_opponent"],
|
|
169
|
-
),
|
|
170
167
|
"start_league_quantile": ParamSpec(
|
|
171
168
|
param_type="float",
|
|
172
169
|
low=0.05,
|
|
@@ -177,24 +174,46 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
177
174
|
low=40,
|
|
178
175
|
high=500,
|
|
179
176
|
),
|
|
180
|
-
"start_team_rating_subtract": ParamSpec(
|
|
181
|
-
param_type="float",
|
|
182
|
-
low=0.0,
|
|
183
|
-
high=200.0,
|
|
184
|
-
),
|
|
185
|
-
"start_team_weight": ParamSpec(
|
|
186
|
-
param_type="float",
|
|
187
|
-
low=0.0,
|
|
188
|
-
high=1.0,
|
|
189
|
-
),
|
|
190
|
-
"start_min_match_count_team_rating": ParamSpec(
|
|
191
|
-
param_type="int",
|
|
192
|
-
low=1,
|
|
193
|
-
high=10,
|
|
194
|
-
),
|
|
195
177
|
}
|
|
196
178
|
|
|
197
179
|
|
|
180
|
+
def get_full_player_rating_search_space() -> dict[str, ParamSpec]:
|
|
181
|
+
"""
|
|
182
|
+
Full search space for PlayerRatingGenerator including all tunable parameters.
|
|
183
|
+
|
|
184
|
+
Includes performance_predictor and team-based start rating parameters.
|
|
185
|
+
Use this when you want to tune all parameters.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dictionary mapping parameter names to ParamSpec objects
|
|
189
|
+
"""
|
|
190
|
+
base = get_default_player_rating_search_space()
|
|
191
|
+
base.update(
|
|
192
|
+
{
|
|
193
|
+
"performance_predictor": ParamSpec(
|
|
194
|
+
param_type="categorical",
|
|
195
|
+
choices=["difference", "mean", "ignore_opponent"],
|
|
196
|
+
),
|
|
197
|
+
"start_team_rating_subtract": ParamSpec(
|
|
198
|
+
param_type="float",
|
|
199
|
+
low=0.0,
|
|
200
|
+
high=200.0,
|
|
201
|
+
),
|
|
202
|
+
"start_team_weight": ParamSpec(
|
|
203
|
+
param_type="float",
|
|
204
|
+
low=0.0,
|
|
205
|
+
high=1.0,
|
|
206
|
+
),
|
|
207
|
+
"start_min_match_count_team_rating": ParamSpec(
|
|
208
|
+
param_type="int",
|
|
209
|
+
low=1,
|
|
210
|
+
high=10,
|
|
211
|
+
),
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
return base
|
|
215
|
+
|
|
216
|
+
|
|
198
217
|
def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
|
|
199
218
|
"""
|
|
200
219
|
Default search space for TeamRatingGenerator.
|
|
@@ -235,10 +254,6 @@ def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
|
|
|
235
254
|
"use_off_def_split": ParamSpec(
|
|
236
255
|
param_type="bool",
|
|
237
256
|
),
|
|
238
|
-
"performance_predictor": ParamSpec(
|
|
239
|
-
param_type="categorical",
|
|
240
|
-
choices=["difference", "mean", "ignore_opponent"],
|
|
241
|
-
),
|
|
242
257
|
}
|
|
243
258
|
|
|
244
259
|
|
|
@@ -91,6 +91,9 @@ class RatingHyperparameterTuner:
|
|
|
91
91
|
scorer: BaseScorer,
|
|
92
92
|
direction: Literal["minimize", "maximize"],
|
|
93
93
|
param_search_space: dict[str, ParamSpec] | None = None,
|
|
94
|
+
param_ranges: dict[str, tuple[float | int, float | int]] | None = None,
|
|
95
|
+
exclude_params: list[str] | None = None,
|
|
96
|
+
fixed_params: dict[str, Any] | None = None,
|
|
94
97
|
n_trials: int = 50,
|
|
95
98
|
n_jobs: int = 1,
|
|
96
99
|
storage: str | None = None,
|
|
@@ -109,6 +112,14 @@ class RatingHyperparameterTuner:
|
|
|
109
112
|
scorer: Scorer for evaluation (must have score(df) -> float | dict)
|
|
110
113
|
direction: "minimize" or "maximize"
|
|
111
114
|
param_search_space: Custom search space (merges with defaults if provided)
|
|
115
|
+
param_ranges: Easy range override for float/int params. Maps param name to
|
|
116
|
+
(low, high) tuple. Preserves param_type and log scale from defaults.
|
|
117
|
+
Example: {"confidence_weight": (0.2, 1.0)}
|
|
118
|
+
exclude_params: List of param names to exclude from tuning entirely.
|
|
119
|
+
Example: ["performance_predictor", "use_off_def_split"]
|
|
120
|
+
fixed_params: Parameters to fix at specific values (not tuned).
|
|
121
|
+
These values are applied to the rating generator each trial.
|
|
122
|
+
Example: {"performance_predictor": "mean"}
|
|
112
123
|
n_trials: Number of optimization trials
|
|
113
124
|
n_jobs: Number of parallel jobs (1 = sequential)
|
|
114
125
|
storage: Optuna storage URL (e.g., "sqlite:///optuna.db") for persistence
|
|
@@ -123,6 +134,9 @@ class RatingHyperparameterTuner:
|
|
|
123
134
|
self.scorer = scorer
|
|
124
135
|
self.direction = direction
|
|
125
136
|
self.custom_search_space = param_search_space
|
|
137
|
+
self.param_ranges = param_ranges
|
|
138
|
+
self.exclude_params = exclude_params or []
|
|
139
|
+
self.fixed_params = fixed_params or {}
|
|
126
140
|
self.n_trials = n_trials
|
|
127
141
|
self.n_jobs = n_jobs
|
|
128
142
|
self.storage = storage
|
|
@@ -196,6 +210,9 @@ class RatingHyperparameterTuner:
|
|
|
196
210
|
try:
|
|
197
211
|
copied_gen = copy.deepcopy(self.rating_generator)
|
|
198
212
|
|
|
213
|
+
for param_name, param_value in self.fixed_params.items():
|
|
214
|
+
setattr(copied_gen, param_name, param_value)
|
|
215
|
+
|
|
199
216
|
trial_params = self._suggest_params(trial, search_space)
|
|
200
217
|
|
|
201
218
|
for param_name, param_value in trial_params.items():
|
|
@@ -243,18 +260,54 @@ class RatingHyperparameterTuner:
|
|
|
243
260
|
defaults: dict[str, ParamSpec],
|
|
244
261
|
) -> dict[str, ParamSpec]:
|
|
245
262
|
"""
|
|
246
|
-
Merge custom search space with defaults
|
|
263
|
+
Merge custom search space with defaults.
|
|
264
|
+
|
|
265
|
+
Priority order (highest to lowest):
|
|
266
|
+
1. exclude_params - removes param entirely
|
|
267
|
+
2. fixed_params - removes from search (applied separately)
|
|
268
|
+
3. custom (param_search_space) - full ParamSpec override
|
|
269
|
+
4. param_ranges - updates only low/high bounds
|
|
270
|
+
5. defaults - base search space
|
|
247
271
|
|
|
248
272
|
Args:
|
|
249
273
|
custom: Custom search space (may be None)
|
|
250
274
|
defaults: Default search space
|
|
251
275
|
|
|
252
276
|
Returns:
|
|
253
|
-
Merged search space
|
|
277
|
+
Merged search space (excludes fixed_params, those are applied separately)
|
|
254
278
|
"""
|
|
255
279
|
merged = defaults.copy()
|
|
280
|
+
|
|
281
|
+
if self.param_ranges:
|
|
282
|
+
for param_name, (low, high) in self.param_ranges.items():
|
|
283
|
+
if param_name not in merged:
|
|
284
|
+
raise ValueError(
|
|
285
|
+
f"param_ranges contains unknown parameter: '{param_name}'. "
|
|
286
|
+
f"Available parameters: {list(merged.keys())}"
|
|
287
|
+
)
|
|
288
|
+
existing = merged[param_name]
|
|
289
|
+
if existing.param_type not in ("float", "int"):
|
|
290
|
+
raise ValueError(
|
|
291
|
+
f"param_ranges can only override float/int parameters. "
|
|
292
|
+
f"'{param_name}' is {existing.param_type}."
|
|
293
|
+
)
|
|
294
|
+
merged[param_name] = ParamSpec(
|
|
295
|
+
param_type=existing.param_type,
|
|
296
|
+
low=low,
|
|
297
|
+
high=high,
|
|
298
|
+
log=existing.log,
|
|
299
|
+
step=existing.step,
|
|
300
|
+
)
|
|
301
|
+
|
|
256
302
|
if custom:
|
|
257
303
|
merged.update(custom)
|
|
304
|
+
|
|
305
|
+
for param_name in self.exclude_params:
|
|
306
|
+
merged.pop(param_name, None)
|
|
307
|
+
|
|
308
|
+
for param_name in self.fixed_params:
|
|
309
|
+
merged.pop(param_name, None)
|
|
310
|
+
|
|
258
311
|
return merged
|
|
259
312
|
|
|
260
313
|
@staticmethod
|
|
@@ -250,8 +250,6 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
250
250
|
)
|
|
251
251
|
)
|
|
252
252
|
|
|
253
|
-
sum_weight = sum([w.weight for w in self.weights])
|
|
254
|
-
|
|
255
253
|
for column_weight in self.weights:
|
|
256
254
|
weight_col = f"weight__{column_weight.name}"
|
|
257
255
|
feature_col = column_weight.name
|
|
@@ -261,14 +259,14 @@ class PerformanceWeightsManager(PerformanceManager):
|
|
|
261
259
|
df = df.with_columns(
|
|
262
260
|
(
|
|
263
261
|
nw.col(tmp_out_performance_colum_name)
|
|
264
|
-
+ (nw.col(weight_col)
|
|
262
|
+
+ (nw.col(weight_col) * (1 - nw.col(feature_name)))
|
|
265
263
|
).alias(tmp_out_performance_colum_name)
|
|
266
264
|
)
|
|
267
265
|
else:
|
|
268
266
|
df = df.with_columns(
|
|
269
267
|
(
|
|
270
268
|
nw.col(tmp_out_performance_colum_name)
|
|
271
|
-
+ (nw.col(weight_col)
|
|
269
|
+
+ (nw.col(weight_col) * nw.col(feature_name))
|
|
272
270
|
).alias(tmp_out_performance_colum_name)
|
|
273
271
|
)
|
|
274
272
|
|