spforge 0.8.17__tar.gz → 0.8.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (119) hide show
  1. {spforge-0.8.17/spforge.egg-info → spforge-0.8.19}/PKG-INFO +1 -1
  2. {spforge-0.8.17 → spforge-0.8.19}/pyproject.toml +1 -1
  3. {spforge-0.8.17 → spforge-0.8.19}/spforge/autopipeline.py +11 -1
  4. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_group_by_estimator.py +11 -3
  5. {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/__init__.py +2 -0
  6. {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/_default_search_spaces.py +38 -23
  7. {spforge-0.8.17 → spforge-0.8.19}/spforge/hyperparameter_tuning/_tuner.py +55 -2
  8. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_other_transformer.py +38 -8
  9. {spforge-0.8.17 → spforge-0.8.19/spforge.egg-info}/PKG-INFO +1 -1
  10. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -4
  11. {spforge-0.8.17 → spforge-0.8.19}/tests/hyperparameter_tuning/test_rating_tuner.py +157 -0
  12. {spforge-0.8.17 → spforge-0.8.19}/tests/test_autopipeline.py +143 -7
  13. {spforge-0.8.17 → spforge-0.8.19}/LICENSE +0 -0
  14. {spforge-0.8.17 → spforge-0.8.19}/MANIFEST.in +0 -0
  15. {spforge-0.8.17 → spforge-0.8.19}/README.md +0 -0
  16. {spforge-0.8.17 → spforge-0.8.19}/examples/__init__.py +0 -0
  17. {spforge-0.8.17 → spforge-0.8.19}/examples/game_level_example.py +0 -0
  18. {spforge-0.8.17 → spforge-0.8.19}/examples/lol/__init__.py +0 -0
  19. {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/__init__.py +0 -0
  20. {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/subsample_lol_data.parquet +0 -0
  21. {spforge-0.8.17 → spforge-0.8.19}/examples/lol/data/utils.py +0 -0
  22. {spforge-0.8.17 → spforge-0.8.19}/examples/lol/pipeline_transformer_example.py +0 -0
  23. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/__init__.py +0 -0
  24. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/cross_validation_example.py +0 -0
  25. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/__init__.py +0 -0
  26. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/game_player_subsample.parquet +0 -0
  27. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/data/utils.py +0 -0
  28. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/feature_engineering_example.py +0 -0
  29. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/game_winner_example.py +0 -0
  30. {spforge-0.8.17 → spforge-0.8.19}/examples/nba/predictor_transformers_example.py +0 -0
  31. {spforge-0.8.17 → spforge-0.8.19}/setup.cfg +0 -0
  32. {spforge-0.8.17 → spforge-0.8.19}/spforge/__init__.py +0 -0
  33. {spforge-0.8.17 → spforge-0.8.19}/spforge/base_feature_generator.py +0 -0
  34. {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/__init__.py +0 -0
  35. {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/_base.py +0 -0
  36. {spforge-0.8.17 → spforge-0.8.19}/spforge/cross_validator/cross_validator.py +0 -0
  37. {spforge-0.8.17 → spforge-0.8.19}/spforge/data_structures.py +0 -0
  38. {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/__init__.py +0 -0
  39. {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_negative_binomial_estimator.py +0 -0
  40. {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_normal_distribution_predictor.py +0 -0
  41. {spforge-0.8.17 → spforge-0.8.19}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
  42. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/__init__.py +0 -0
  43. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_conditional_estimator.py +0 -0
  44. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
  45. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_granularity_estimator.py +0 -0
  46. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_ordinal_classifier.py +0 -0
  47. {spforge-0.8.17 → spforge-0.8.19}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
  48. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/__init__.py +0 -0
  49. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_base.py +0 -0
  50. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_lag.py +0 -0
  51. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_net_over_predicted.py +0 -0
  52. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
  53. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
  54. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
  55. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_mean_days.py +0 -0
  56. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_rolling_window.py +0 -0
  57. {spforge-0.8.17 → spforge-0.8.19}/spforge/feature_generator/_utils.py +0 -0
  58. {spforge-0.8.17 → spforge-0.8.19}/spforge/features_generator_pipeline.py +0 -0
  59. {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/__init__.py +0 -0
  60. {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/_performance_manager.py +0 -0
  61. {spforge-0.8.17 → spforge-0.8.19}/spforge/performance_transformers/_performances_transformers.py +0 -0
  62. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/__init__.py +0 -0
  63. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_base.py +0 -0
  64. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_player_rating.py +0 -0
  65. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/_team_rating.py +0 -0
  66. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/enums.py +0 -0
  67. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/league_identifier.py +0 -0
  68. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/league_start_rating_optimizer.py +0 -0
  69. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/player_performance_predictor.py +0 -0
  70. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/start_rating_generator.py +0 -0
  71. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/team_performance_predictor.py +0 -0
  72. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/team_start_rating_generator.py +0 -0
  73. {spforge-0.8.17 → spforge-0.8.19}/spforge/ratings/utils.py +0 -0
  74. {spforge-0.8.17 → spforge-0.8.19}/spforge/scorer/__init__.py +0 -0
  75. {spforge-0.8.17 → spforge-0.8.19}/spforge/scorer/_score.py +0 -0
  76. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/__init__.py +0 -0
  77. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_base.py +0 -0
  78. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_net_over_predicted.py +0 -0
  79. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_operator.py +0 -0
  80. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_predictor.py +0 -0
  81. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_simple_transformer.py +0 -0
  82. {spforge-0.8.17 → spforge-0.8.19}/spforge/transformers/_team_ratio_predictor.py +0 -0
  83. {spforge-0.8.17 → spforge-0.8.19}/spforge/utils.py +0 -0
  84. {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/SOURCES.txt +0 -0
  85. {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/dependency_links.txt +0 -0
  86. {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/requires.txt +0 -0
  87. {spforge-0.8.17 → spforge-0.8.19}/spforge.egg-info/top_level.txt +0 -0
  88. {spforge-0.8.17 → spforge-0.8.19}/tests/cross_validator/test_cross_validator.py +0 -0
  89. {spforge-0.8.17 → spforge-0.8.19}/tests/distributions/test_distribution.py +0 -0
  90. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
  91. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
  92. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_lol_player_kills.py +0 -0
  93. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_player_points.py +0 -0
  94. {spforge-0.8.17 → spforge-0.8.19}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
  95. {spforge-0.8.17 → spforge-0.8.19}/tests/estimator/test_sklearn_estimator.py +0 -0
  96. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_lag.py +0 -0
  97. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
  98. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
  99. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
  100. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_mean_days.py +0 -0
  101. {spforge-0.8.17 → spforge-0.8.19}/tests/feature_generator/test_rolling_window.py +0 -0
  102. {spforge-0.8.17 → spforge-0.8.19}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
  103. {spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performance_manager.py +0 -0
  104. {spforge-0.8.17 → spforge-0.8.19}/tests/performance_transformers/test_performances_transformers.py +0 -0
  105. {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_player_rating_generator.py +0 -0
  106. {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_player_rating_no_mutation.py +0 -0
  107. {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_ratings_property.py +0 -0
  108. {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_team_rating_generator.py +0 -0
  109. {spforge-0.8.17 → spforge-0.8.19}/tests/ratings/test_utils_scaled_weights.py +0 -0
  110. {spforge-0.8.17 → spforge-0.8.19}/tests/scorer/test_score.py +0 -0
  111. {spforge-0.8.17 → spforge-0.8.19}/tests/scorer/test_score_aggregation_granularity.py +0 -0
  112. {spforge-0.8.17 → spforge-0.8.19}/tests/test_autopipeline_context.py +0 -0
  113. {spforge-0.8.17 → spforge-0.8.19}/tests/test_feature_generator_pipeline.py +0 -0
  114. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_estimator_transformer_context.py +0 -0
  115. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_net_over_predicted.py +0 -0
  116. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_other_transformer.py +0 -0
  117. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_predictor_transformer.py +0 -0
  118. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_simple_transformer.py +0 -0
  119. {spforge-0.8.17 → spforge-0.8.19}/tests/transformers/test_team_ratio_predictor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.17
3
+ Version: 0.8.19
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spforge"
7
- version = "0.8.17"
7
+ version = "0.8.19"
8
8
  description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -236,6 +236,7 @@ class AutoPipeline(BaseEstimator):
236
236
  estimator_features: list[str],
237
237
  predictor_transformers: list[PredictorTransformer] | None = None,
238
238
  granularity: list[str] | None = None,
239
+ aggregation_weight: str | None = None,
239
240
  filters: list[Filter] | None = None,
240
241
  scale_features: bool = False,
241
242
  categorical_handling: CategoricalHandling = "auto",
@@ -250,6 +251,7 @@ class AutoPipeline(BaseEstimator):
250
251
  self.estimator_features = estimator_features
251
252
  self.feature_names = estimator_features # Internal compat
252
253
  self.granularity = granularity or []
254
+ self.aggregation_weight = aggregation_weight
253
255
  self.predictor_transformers = predictor_transformers
254
256
  self.estimator = estimator
255
257
  self.filters = filters or []
@@ -326,6 +328,10 @@ class AutoPipeline(BaseEstimator):
326
328
  # Add granularity columns
327
329
  context.extend(self.granularity)
328
330
 
331
+ # Add aggregation weight column
332
+ if self.aggregation_weight:
333
+ context.append(self.aggregation_weight)
334
+
329
335
  # Add filter columns
330
336
  self._filter_feature_names = []
331
337
  for f in self.filters:
@@ -492,7 +498,11 @@ class AutoPipeline(BaseEstimator):
492
498
  pre = PreprocessorToDataFrame(pre_raw)
493
499
 
494
500
  est = (
495
- GroupByEstimator(self.estimator, granularity=[f"{c}" for c in self.granularity])
501
+ GroupByEstimator(
502
+ self.estimator,
503
+ granularity=[f"{c}" for c in self.granularity],
504
+ aggregation_weight=self.aggregation_weight,
505
+ )
496
506
  if do_groupby
497
507
  else self.estimator
498
508
  )
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
10
10
 
11
11
 
12
12
  class GroupByEstimator(BaseEstimator):
13
- def __init__(self, estimator: Any, granularity: list[str] | None = None):
13
+ def __init__(
14
+ self,
15
+ estimator: Any,
16
+ granularity: list[str] | None = None,
17
+ aggregation_weight: str | None = None,
18
+ ):
14
19
  self.estimator = estimator
15
20
  self.granularity = granularity or []
16
- self._reducer = GroupByReducer(self.granularity)
21
+ self.aggregation_weight = aggregation_weight
22
+ self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
17
23
  self._est = None
18
24
 
19
25
  def __sklearn_is_fitted__(self):
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
22
28
  @nw.narwhalify
23
29
  def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
24
30
  X = X.to_pandas()
25
- self._reducer = GroupByReducer(self.granularity)
31
+ # Backwards compatibility: old pickled objects may not have aggregation_weight
32
+ agg_weight = getattr(self, "aggregation_weight", None)
33
+ self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
26
34
  X_red = nw.from_native(self._reducer.fit_transform(X))
27
35
  y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
28
36
 
@@ -7,6 +7,7 @@ from spforge.hyperparameter_tuning._default_search_spaces import (
7
7
  get_default_search_space,
8
8
  get_default_student_t_search_space,
9
9
  get_default_team_rating_search_space,
10
+ get_full_player_rating_search_space,
10
11
  )
11
12
  from spforge.hyperparameter_tuning._tuner import (
12
13
  EstimatorHyperparameterTuner,
@@ -28,4 +29,5 @@ __all__ = [
28
29
  "get_default_team_rating_search_space",
29
30
  "get_default_student_t_search_space",
30
31
  "get_default_search_space",
32
+ "get_full_player_rating_search_space",
31
33
  ]
@@ -128,6 +128,7 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
128
128
  Default search space for PlayerRatingGenerator.
129
129
 
130
130
  Focuses on core parameters that have the most impact on performance.
131
+ Excludes performance_predictor and team-based start rating params.
131
132
 
132
133
  Returns:
133
134
  Dictionary mapping parameter names to ParamSpec objects
@@ -163,10 +164,6 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
163
164
  "use_off_def_split": ParamSpec(
164
165
  param_type="bool",
165
166
  ),
166
- "performance_predictor": ParamSpec(
167
- param_type="categorical",
168
- choices=["difference", "mean", "ignore_opponent"],
169
- ),
170
167
  "start_league_quantile": ParamSpec(
171
168
  param_type="float",
172
169
  low=0.05,
@@ -177,24 +174,46 @@ def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
177
174
  low=40,
178
175
  high=500,
179
176
  ),
180
- "start_team_rating_subtract": ParamSpec(
181
- param_type="float",
182
- low=0.0,
183
- high=200.0,
184
- ),
185
- "start_team_weight": ParamSpec(
186
- param_type="float",
187
- low=0.0,
188
- high=1.0,
189
- ),
190
- "start_min_match_count_team_rating": ParamSpec(
191
- param_type="int",
192
- low=1,
193
- high=10,
194
- ),
195
177
  }
196
178
 
197
179
 
180
+ def get_full_player_rating_search_space() -> dict[str, ParamSpec]:
181
+ """
182
+ Full search space for PlayerRatingGenerator including all tunable parameters.
183
+
184
+ Includes performance_predictor and team-based start rating parameters.
185
+ Use this when you want to tune all parameters.
186
+
187
+ Returns:
188
+ Dictionary mapping parameter names to ParamSpec objects
189
+ """
190
+ base = get_default_player_rating_search_space()
191
+ base.update(
192
+ {
193
+ "performance_predictor": ParamSpec(
194
+ param_type="categorical",
195
+ choices=["difference", "mean", "ignore_opponent"],
196
+ ),
197
+ "start_team_rating_subtract": ParamSpec(
198
+ param_type="float",
199
+ low=0.0,
200
+ high=200.0,
201
+ ),
202
+ "start_team_weight": ParamSpec(
203
+ param_type="float",
204
+ low=0.0,
205
+ high=1.0,
206
+ ),
207
+ "start_min_match_count_team_rating": ParamSpec(
208
+ param_type="int",
209
+ low=1,
210
+ high=10,
211
+ ),
212
+ }
213
+ )
214
+ return base
215
+
216
+
198
217
  def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
199
218
  """
200
219
  Default search space for TeamRatingGenerator.
@@ -235,10 +254,6 @@ def get_default_team_rating_search_space() -> dict[str, ParamSpec]:
235
254
  "use_off_def_split": ParamSpec(
236
255
  param_type="bool",
237
256
  ),
238
- "performance_predictor": ParamSpec(
239
- param_type="categorical",
240
- choices=["difference", "mean", "ignore_opponent"],
241
- ),
242
257
  }
243
258
 
244
259
 
@@ -91,6 +91,9 @@ class RatingHyperparameterTuner:
91
91
  scorer: BaseScorer,
92
92
  direction: Literal["minimize", "maximize"],
93
93
  param_search_space: dict[str, ParamSpec] | None = None,
94
+ param_ranges: dict[str, tuple[float | int, float | int]] | None = None,
95
+ exclude_params: list[str] | None = None,
96
+ fixed_params: dict[str, Any] | None = None,
94
97
  n_trials: int = 50,
95
98
  n_jobs: int = 1,
96
99
  storage: str | None = None,
@@ -109,6 +112,14 @@ class RatingHyperparameterTuner:
109
112
  scorer: Scorer for evaluation (must have score(df) -> float | dict)
110
113
  direction: "minimize" or "maximize"
111
114
  param_search_space: Custom search space (merges with defaults if provided)
115
+ param_ranges: Easy range override for float/int params. Maps param name to
116
+ (low, high) tuple. Preserves param_type and log scale from defaults.
117
+ Example: {"confidence_weight": (0.2, 1.0)}
118
+ exclude_params: List of param names to exclude from tuning entirely.
119
+ Example: ["performance_predictor", "use_off_def_split"]
120
+ fixed_params: Parameters to fix at specific values (not tuned).
121
+ These values are applied to the rating generator each trial.
122
+ Example: {"performance_predictor": "mean"}
112
123
  n_trials: Number of optimization trials
113
124
  n_jobs: Number of parallel jobs (1 = sequential)
114
125
  storage: Optuna storage URL (e.g., "sqlite:///optuna.db") for persistence
@@ -123,6 +134,9 @@ class RatingHyperparameterTuner:
123
134
  self.scorer = scorer
124
135
  self.direction = direction
125
136
  self.custom_search_space = param_search_space
137
+ self.param_ranges = param_ranges
138
+ self.exclude_params = exclude_params or []
139
+ self.fixed_params = fixed_params or {}
126
140
  self.n_trials = n_trials
127
141
  self.n_jobs = n_jobs
128
142
  self.storage = storage
@@ -196,6 +210,9 @@ class RatingHyperparameterTuner:
196
210
  try:
197
211
  copied_gen = copy.deepcopy(self.rating_generator)
198
212
 
213
+ for param_name, param_value in self.fixed_params.items():
214
+ setattr(copied_gen, param_name, param_value)
215
+
199
216
  trial_params = self._suggest_params(trial, search_space)
200
217
 
201
218
  for param_name, param_value in trial_params.items():
@@ -243,18 +260,54 @@ class RatingHyperparameterTuner:
243
260
  defaults: dict[str, ParamSpec],
244
261
  ) -> dict[str, ParamSpec]:
245
262
  """
246
- Merge custom search space with defaults (custom takes precedence).
263
+ Merge custom search space with defaults.
264
+
265
+ Priority order (highest to lowest):
266
+ 1. exclude_params - removes param entirely
267
+ 2. fixed_params - removes from search (applied separately)
268
+ 3. custom (param_search_space) - full ParamSpec override
269
+ 4. param_ranges - updates only low/high bounds
270
+ 5. defaults - base search space
247
271
 
248
272
  Args:
249
273
  custom: Custom search space (may be None)
250
274
  defaults: Default search space
251
275
 
252
276
  Returns:
253
- Merged search space
277
+ Merged search space (excludes fixed_params, those are applied separately)
254
278
  """
255
279
  merged = defaults.copy()
280
+
281
+ if self.param_ranges:
282
+ for param_name, (low, high) in self.param_ranges.items():
283
+ if param_name not in merged:
284
+ raise ValueError(
285
+ f"param_ranges contains unknown parameter: '{param_name}'. "
286
+ f"Available parameters: {list(merged.keys())}"
287
+ )
288
+ existing = merged[param_name]
289
+ if existing.param_type not in ("float", "int"):
290
+ raise ValueError(
291
+ f"param_ranges can only override float/int parameters. "
292
+ f"'{param_name}' is {existing.param_type}."
293
+ )
294
+ merged[param_name] = ParamSpec(
295
+ param_type=existing.param_type,
296
+ low=low,
297
+ high=high,
298
+ log=existing.log,
299
+ step=existing.step,
300
+ )
301
+
256
302
  if custom:
257
303
  merged.update(custom)
304
+
305
+ for param_name in self.exclude_params:
306
+ merged.pop(param_name, None)
307
+
308
+ for param_name in self.fixed_params:
309
+ merged.pop(param_name, None)
310
+
258
311
  return merged
259
312
 
260
313
  @staticmethod
@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
8
8
 
9
9
 
10
10
  class GroupByReducer(BaseEstimator, TransformerMixin):
11
- def __init__(self, granularity: list[str]):
11
+ def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
12
12
  self.granularity = granularity
13
+ self.aggregation_weight = aggregation_weight
13
14
 
14
15
  @nw.narwhalify
15
16
  def fit(self, X: IntoFrameT, y: Any = None):
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
26
27
  raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
27
28
 
28
29
  non_keys = [c for c in df.columns if c not in keys]
29
- num_cols = [c for c in non_keys if pd.api.types.is_numeric_dtype(df[c])]
30
+ schema = df.schema
31
+ num_cols = [c for c in non_keys if schema[c].is_numeric()]
30
32
  other_cols = [c for c in non_keys if c not in num_cols]
31
33
 
32
34
  aggs: list[nw.Expr] = []
33
35
 
36
+ # Backwards compatibility: old pickled objects may not have aggregation_weight
37
+ weight_col = getattr(self, "aggregation_weight", None)
38
+ has_weight = weight_col and weight_col in df.columns
39
+
34
40
  for c in num_cols:
35
- aggs.append(nw.col(c).mean().alias(c))
41
+ if c == weight_col:
42
+ aggs.append(nw.col(c).sum().alias(c))
43
+ elif has_weight:
44
+ aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
45
+ aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
46
+ else:
47
+ aggs.append(nw.col(c).mean().alias(c))
36
48
 
37
49
  for c in other_cols:
38
50
  aggs.append(nw.col(c).first().alias(c))
39
51
 
52
+ if has_weight:
53
+ aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
54
+
40
55
  out = df.group_by(keys).agg(aggs)
56
+
57
+ if has_weight:
58
+ weighted_cols = [c for c in num_cols if c != weight_col]
59
+ for c in weighted_cols:
60
+ out = out.with_columns(
61
+ nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
62
+ .then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
63
+ .otherwise(nw.col(f"__{c}_fallback"))
64
+ .alias(c)
65
+ )
66
+ drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
67
+ drop_cols += [f"__{c}_fallback" for c in weighted_cols]
68
+ drop_cols.append("__weight_sum")
69
+ out = out.drop(drop_cols)
70
+
41
71
  return out
42
72
 
43
73
  @nw.narwhalify
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
59
89
  if sample_weight is not None:
60
90
  df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
61
91
 
62
- y_is_numeric = df.select(nw.col("__y")).schema["__y"].is_numeric()
92
+ y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
93
+ non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
94
+ if len(non_uniform) > 0:
95
+ raise ValueError("Target (y) must be uniform within each granularity group")
63
96
 
64
- if y_is_numeric:
65
- agg_exprs = [nw.col("__y").mean().alias("__y")]
66
- else:
67
- agg_exprs = [nw.col("__y").first().alias("__y")]
97
+ agg_exprs = [nw.col("__y").first().alias("__y")]
68
98
 
69
99
  if sample_weight is not None:
70
100
  agg_exprs.append(nw.col("__sw").sum().alias("__sw"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.17
3
+ Version: 0.8.19
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -96,12 +96,8 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
96
96
  "confidence_value_denom",
97
97
  "confidence_max_sum",
98
98
  "use_off_def_split",
99
- "performance_predictor",
100
- "start_team_weight",
101
99
  "start_league_quantile",
102
100
  "start_min_count_for_percentiles",
103
- "start_min_match_count_team_rating",
104
- "start_team_rating_subtract",
105
101
  }
106
102
  assert set(result.best_params.keys()) == expected_params
107
103
 
@@ -454,3 +454,160 @@ def test_param_spec__categorical_requires_choices():
454
454
 
455
455
  with pytest.raises(ValueError, match="requires choices"):
456
456
  spec.suggest(trial, "test_param")
457
+
458
+
459
+ def test_param_ranges__overrides_bounds(
460
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
461
+ ):
462
+ """Test that param_ranges overrides low/high bounds while preserving param_type."""
463
+ tuner = RatingHyperparameterTuner(
464
+ rating_generator=player_rating_generator,
465
+ cross_validator=cross_validator,
466
+ scorer=scorer,
467
+ direction="minimize",
468
+ param_ranges={
469
+ "confidence_weight": (0.2, 0.3),
470
+ },
471
+ n_trials=3,
472
+ show_progress_bar=False,
473
+ )
474
+
475
+ result = tuner.optimize(sample_player_df_pd)
476
+
477
+ assert "confidence_weight" in result.best_params
478
+ assert 0.2 <= result.best_params["confidence_weight"] <= 0.3
479
+
480
+
481
+ def test_exclude_params__removes_from_search(
482
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
483
+ ):
484
+ """Test that exclude_params removes parameters from search space."""
485
+ tuner = RatingHyperparameterTuner(
486
+ rating_generator=player_rating_generator,
487
+ cross_validator=cross_validator,
488
+ scorer=scorer,
489
+ direction="minimize",
490
+ exclude_params=["use_off_def_split", "confidence_weight"],
491
+ n_trials=3,
492
+ show_progress_bar=False,
493
+ )
494
+
495
+ result = tuner.optimize(sample_player_df_pd)
496
+
497
+ assert "use_off_def_split" not in result.best_params
498
+ assert "confidence_weight" not in result.best_params
499
+ assert "rating_change_multiplier_offense" in result.best_params
500
+
501
+
502
+ def test_fixed_params__applies_values_without_tuning(
503
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
504
+ ):
505
+ """Test that fixed_params sets values without including in search space."""
506
+ tuner = RatingHyperparameterTuner(
507
+ rating_generator=player_rating_generator,
508
+ cross_validator=cross_validator,
509
+ scorer=scorer,
510
+ direction="minimize",
511
+ fixed_params={"use_off_def_split": False},
512
+ n_trials=3,
513
+ show_progress_bar=False,
514
+ )
515
+
516
+ result = tuner.optimize(sample_player_df_pd)
517
+
518
+ assert "use_off_def_split" not in result.best_params
519
+
520
+
521
+ def test_param_ranges__unknown_param_raises_error(
522
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
523
+ ):
524
+ """Test that param_ranges with unknown param raises ValueError."""
525
+ tuner = RatingHyperparameterTuner(
526
+ rating_generator=player_rating_generator,
527
+ cross_validator=cross_validator,
528
+ scorer=scorer,
529
+ direction="minimize",
530
+ param_ranges={"nonexistent_param": (0.0, 1.0)},
531
+ n_trials=3,
532
+ show_progress_bar=False,
533
+ )
534
+
535
+ with pytest.raises(ValueError, match="unknown parameter"):
536
+ tuner.optimize(sample_player_df_pd)
537
+
538
+
539
+ def test_param_ranges__non_numeric_param_raises_error(
540
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
541
+ ):
542
+ """Test that param_ranges on non-float/int param raises ValueError."""
543
+ tuner = RatingHyperparameterTuner(
544
+ rating_generator=player_rating_generator,
545
+ cross_validator=cross_validator,
546
+ scorer=scorer,
547
+ direction="minimize",
548
+ param_ranges={"use_off_def_split": (0, 1)},
549
+ n_trials=3,
550
+ show_progress_bar=False,
551
+ )
552
+
553
+ with pytest.raises(ValueError, match="can only override float/int"):
554
+ tuner.optimize(sample_player_df_pd)
555
+
556
+
557
+ def test_combined_api__param_ranges_exclude_fixed(
558
+ player_rating_generator, cross_validator, scorer, sample_player_df_pd
559
+ ):
560
+ """Test using param_ranges, exclude_params, and fixed_params together."""
561
+ tuner = RatingHyperparameterTuner(
562
+ rating_generator=player_rating_generator,
563
+ cross_validator=cross_validator,
564
+ scorer=scorer,
565
+ direction="minimize",
566
+ param_ranges={
567
+ "confidence_weight": (0.2, 1.0),
568
+ "rating_change_multiplier_offense": (10.0, 150.0),
569
+ },
570
+ exclude_params=["start_league_quantile"],
571
+ fixed_params={"use_off_def_split": False},
572
+ n_trials=3,
573
+ show_progress_bar=False,
574
+ )
575
+
576
+ result = tuner.optimize(sample_player_df_pd)
577
+
578
+ assert 0.2 <= result.best_params["confidence_weight"] <= 1.0
579
+ assert 10.0 <= result.best_params["rating_change_multiplier_offense"] <= 150.0
580
+ assert "start_league_quantile" not in result.best_params
581
+ assert "use_off_def_split" not in result.best_params
582
+
583
+
584
+ def test_default_search_space__excludes_performance_predictor_and_team_start(
585
+ player_rating_generator,
586
+ ):
587
+ """Test that performance_predictor and team start params are not in default search space."""
588
+ from spforge.hyperparameter_tuning._default_search_spaces import (
589
+ get_default_search_space,
590
+ )
591
+
592
+ defaults = get_default_search_space(player_rating_generator)
593
+
594
+ assert "performance_predictor" not in defaults
595
+ assert "start_team_rating_subtract" not in defaults
596
+ assert "start_team_weight" not in defaults
597
+ assert "start_min_match_count_team_rating" not in defaults
598
+
599
+
600
+ def test_full_player_rating_search_space__includes_all_params():
601
+ """Test that full search space includes performance_predictor and team start params."""
602
+ from spforge.hyperparameter_tuning._default_search_spaces import (
603
+ get_full_player_rating_search_space,
604
+ )
605
+
606
+ full = get_full_player_rating_search_space()
607
+
608
+ assert "performance_predictor" in full
609
+ assert "start_team_rating_subtract" in full
610
+ assert "start_team_weight" in full
611
+ assert "start_min_match_count_team_rating" in full
612
+ assert "rating_change_multiplier_offense" in full
613
+ assert "confidence_weight" in full
@@ -328,7 +328,18 @@ def test_infer_categorical_from_feature_names_when_only_numeric_features_given(d
328
328
  assert any(c.startswith("cat") for c in cap.fit_columns)
329
329
 
330
330
 
331
- def test_granularity_groups_rows_before_estimator_fit_and_predict(df_reg):
331
+ def test_granularity_groups_rows_before_estimator_fit_and_predict(frame):
332
+ df_pd = pd.DataFrame(
333
+ {
334
+ "gameid": ["g1", "g1", "g2", "g2", "g3", "g3"],
335
+ "num1": [1.0, 2.0, np.nan, 4.0, 5.0, 6.0],
336
+ "num2": [10.0, 20.0, 30.0, 40.0, np.nan, 60.0],
337
+ "cat1": ["a", "b", "a", None, "b", "c"],
338
+ "y": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
339
+ }
340
+ )
341
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
342
+
332
343
  model = AutoPipeline(
333
344
  estimator=CaptureEstimator(),
334
345
  estimator_features=["gameid", "num1", "num2", "cat1"],
@@ -339,16 +350,16 @@ def test_granularity_groups_rows_before_estimator_fit_and_predict(df_reg):
339
350
  remainder="drop",
340
351
  )
341
352
 
342
- X = _select(df_reg, ["gameid", "num1", "num2", "cat1"])
343
- y = _col(df_reg, "y")
353
+ X = _select(df, ["gameid", "num1", "num2", "cat1"])
354
+ y = _col(df, "y")
344
355
  model.fit(X, y=y)
345
356
 
346
357
  inner = _inner_estimator(model)
347
358
 
348
- if isinstance(df_reg, pl.DataFrame):
349
- n_groups = df_reg.select(pl.col("gameid").n_unique()).item()
359
+ if isinstance(df, pl.DataFrame):
360
+ n_groups = df.select(pl.col("gameid").n_unique()).item()
350
361
  else:
351
- n_groups = df_reg["gameid"].nunique()
362
+ n_groups = df["gameid"].nunique()
352
363
 
353
364
  assert inner.fit_shape[0] == n_groups
354
365
 
@@ -724,9 +735,10 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
724
735
  "gameid": ["g1", "g1", "g2", "g2"],
725
736
  "num1": [1.0, 2.0, 3.0, 4.0],
726
737
  "num2": [10.0, 20.0, 30.0, 40.0],
738
+ "y": [1.0, 1.0, 2.0, 2.0],
727
739
  }
728
740
  )
729
- y = pd.Series([1.0, 2.0, 3.0, 4.0], name="y")
741
+ y = df["y"]
730
742
 
731
743
  model = AutoPipeline(
732
744
  estimator=RandomForestRegressor(n_estimators=5, random_state=42),
@@ -745,3 +757,127 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
745
757
  assert list(names.keys()) == list(inner.feature_names_in_)
746
758
  assert "gameid" not in names
747
759
  assert "const_pred" in names
760
+
761
+
762
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
763
+ def test_granularity_with_aggregation_weight__features_weighted(frame):
764
+ df_pd = pd.DataFrame(
765
+ {
766
+ "gameid": ["g1", "g1", "g2", "g2"],
767
+ "num1": [10.0, 30.0, 20.0, 40.0],
768
+ "weight": [0.25, 0.75, 0.5, 0.5],
769
+ "y": [1.0, 1.0, 2.0, 2.0],
770
+ }
771
+ )
772
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
773
+
774
+ cap = CaptureEstimator()
775
+ model = AutoPipeline(
776
+ estimator=cap,
777
+ estimator_features=["num1"],
778
+ granularity=["gameid"],
779
+ aggregation_weight="weight",
780
+ remainder="drop",
781
+ )
782
+
783
+ X = _select(df, ["gameid", "num1", "weight"])
784
+ y = _col(df, "y")
785
+ model.fit(X, y=y)
786
+
787
+ inner = _inner_estimator(model)
788
+ assert inner.fit_shape[0] == 2
789
+
790
+ preds = model.predict(X)
791
+ assert preds.shape[0] == len(X)
792
+
793
+
794
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
795
+ def test_granularity_aggregation_weight__weighted_mean_correct(frame):
796
+ df_pd = pd.DataFrame(
797
+ {
798
+ "gameid": ["g1", "g1"],
799
+ "num1": [10.0, 30.0],
800
+ "weight": [0.25, 0.75],
801
+ "y": [1.0, 1.0],
802
+ }
803
+ )
804
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
805
+
806
+ from spforge.transformers._other_transformer import GroupByReducer
807
+
808
+ reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
809
+ transformed = reducer.fit_transform(df)
810
+
811
+ if frame == "pl":
812
+ num1_val = transformed["num1"].to_list()[0]
813
+ else:
814
+ num1_val = transformed["num1"].iloc[0]
815
+
816
+ expected = (10.0 * 0.25 + 30.0 * 0.75) / (0.25 + 0.75)
817
+ assert abs(num1_val - expected) < 1e-6
818
+
819
+
820
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
821
+ def test_reduce_y_raises_when_target_not_uniform_per_group(frame):
822
+ df_pd = pd.DataFrame(
823
+ {
824
+ "gameid": ["g1", "g1"],
825
+ "num1": [10.0, 30.0],
826
+ }
827
+ )
828
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
829
+
830
+ from spforge.transformers._other_transformer import GroupByReducer
831
+
832
+ reducer = GroupByReducer(granularity=["gameid"])
833
+
834
+ y = np.array([1.0, 2.0])
835
+ with pytest.raises(ValueError, match="Target.*must be uniform"):
836
+ reducer.reduce_y(df, y)
837
+
838
+
839
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
840
+ def test_reduce_y_works_when_target_uniform_per_group(frame):
841
+ df_pd = pd.DataFrame(
842
+ {
843
+ "gameid": ["g1", "g1", "g2", "g2"],
844
+ "num1": [10.0, 30.0, 20.0, 40.0],
845
+ }
846
+ )
847
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
848
+
849
+ from spforge.transformers._other_transformer import GroupByReducer
850
+
851
+ reducer = GroupByReducer(granularity=["gameid"])
852
+
853
+ y = np.array([1.0, 1.0, 2.0, 2.0])
854
+ y_out, _ = reducer.reduce_y(df, y)
855
+
856
+ assert len(y_out) == 2
857
+ assert set(y_out) == {1.0, 2.0}
858
+
859
+
860
+ @pytest.mark.parametrize("frame", ["pd", "pl"])
861
+ def test_aggregation_weight_sums_weight_column(frame):
862
+ df_pd = pd.DataFrame(
863
+ {
864
+ "gameid": ["g1", "g1"],
865
+ "num1": [10.0, 30.0],
866
+ "weight": [0.25, 0.75],
867
+ "y": [1.0, 1.0],
868
+ }
869
+ )
870
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
871
+
872
+ from spforge.transformers._other_transformer import GroupByReducer
873
+
874
+ reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
875
+ transformed = reducer.fit_transform(df)
876
+
877
+ if frame == "pl":
878
+ weight_val = transformed["weight"].to_list()[0]
879
+ else:
880
+ weight_val = transformed["weight"].iloc[0]
881
+
882
+ expected = 0.25 + 0.75
883
+ assert abs(weight_val - expected) < 1e-6
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes