spforge 0.8.4__tar.gz → 0.8.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (115) hide show
  1. {spforge-0.8.4/spforge.egg-info → spforge-0.8.5}/PKG-INFO +2 -2
  2. {spforge-0.8.4 → spforge-0.8.5}/pyproject.toml +2 -2
  3. {spforge-0.8.4 → spforge-0.8.5}/spforge/__init__.py +1 -0
  4. {spforge-0.8.4 → spforge-0.8.5}/spforge/hyperparameter_tuning/__init__.py +12 -0
  5. {spforge-0.8.4 → spforge-0.8.5}/spforge/hyperparameter_tuning/_default_search_spaces.py +133 -0
  6. {spforge-0.8.4 → spforge-0.8.5}/spforge/hyperparameter_tuning/_tuner.py +192 -0
  7. {spforge-0.8.4 → spforge-0.8.5/spforge.egg-info}/PKG-INFO +2 -2
  8. {spforge-0.8.4 → spforge-0.8.5}/spforge.egg-info/SOURCES.txt +2 -0
  9. {spforge-0.8.4 → spforge-0.8.5}/spforge.egg-info/requires.txt +1 -1
  10. spforge-0.8.5/tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
  11. spforge-0.8.5/tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
  12. {spforge-0.8.4 → spforge-0.8.5}/LICENSE +0 -0
  13. {spforge-0.8.4 → spforge-0.8.5}/MANIFEST.in +0 -0
  14. {spforge-0.8.4 → spforge-0.8.5}/README.md +0 -0
  15. {spforge-0.8.4 → spforge-0.8.5}/examples/__init__.py +0 -0
  16. {spforge-0.8.4 → spforge-0.8.5}/examples/game_level_example.py +0 -0
  17. {spforge-0.8.4 → spforge-0.8.5}/examples/lol/__init__.py +0 -0
  18. {spforge-0.8.4 → spforge-0.8.5}/examples/lol/data/__init__.py +0 -0
  19. {spforge-0.8.4 → spforge-0.8.5}/examples/lol/data/subsample_lol_data.parquet +0 -0
  20. {spforge-0.8.4 → spforge-0.8.5}/examples/lol/data/utils.py +0 -0
  21. {spforge-0.8.4 → spforge-0.8.5}/examples/lol/pipeline_transformer_example.py +0 -0
  22. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/__init__.py +0 -0
  23. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/cross_validation_example.py +0 -0
  24. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/data/__init__.py +0 -0
  25. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/data/game_player_subsample.parquet +0 -0
  26. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/data/utils.py +0 -0
  27. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/feature_engineering_example.py +0 -0
  28. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/game_winner_example.py +0 -0
  29. {spforge-0.8.4 → spforge-0.8.5}/examples/nba/predictor_transformers_example.py +0 -0
  30. {spforge-0.8.4 → spforge-0.8.5}/setup.cfg +0 -0
  31. {spforge-0.8.4 → spforge-0.8.5}/spforge/autopipeline.py +0 -0
  32. {spforge-0.8.4 → spforge-0.8.5}/spforge/base_feature_generator.py +0 -0
  33. {spforge-0.8.4 → spforge-0.8.5}/spforge/cross_validator/__init__.py +0 -0
  34. {spforge-0.8.4 → spforge-0.8.5}/spforge/cross_validator/_base.py +0 -0
  35. {spforge-0.8.4 → spforge-0.8.5}/spforge/cross_validator/cross_validator.py +0 -0
  36. {spforge-0.8.4 → spforge-0.8.5}/spforge/data_structures.py +0 -0
  37. {spforge-0.8.4 → spforge-0.8.5}/spforge/distributions/__init__.py +0 -0
  38. {spforge-0.8.4 → spforge-0.8.5}/spforge/distributions/_negative_binomial_estimator.py +0 -0
  39. {spforge-0.8.4 → spforge-0.8.5}/spforge/distributions/_normal_distribution_predictor.py +0 -0
  40. {spforge-0.8.4 → spforge-0.8.5}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
  41. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/__init__.py +0 -0
  42. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_conditional_estimator.py +0 -0
  43. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
  44. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_granularity_estimator.py +0 -0
  45. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_group_by_estimator.py +0 -0
  46. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_ordinal_classifier.py +0 -0
  47. {spforge-0.8.4 → spforge-0.8.5}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
  48. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/__init__.py +0 -0
  49. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_base.py +0 -0
  50. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_lag.py +0 -0
  51. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_net_over_predicted.py +0 -0
  52. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
  53. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
  54. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
  55. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_rolling_mean_days.py +0 -0
  56. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_rolling_window.py +0 -0
  57. {spforge-0.8.4 → spforge-0.8.5}/spforge/feature_generator/_utils.py +0 -0
  58. {spforge-0.8.4 → spforge-0.8.5}/spforge/features_generator_pipeline.py +0 -0
  59. {spforge-0.8.4 → spforge-0.8.5}/spforge/performance_transformers/__init__.py +0 -0
  60. {spforge-0.8.4 → spforge-0.8.5}/spforge/performance_transformers/_performance_manager.py +0 -0
  61. {spforge-0.8.4 → spforge-0.8.5}/spforge/performance_transformers/_performances_transformers.py +0 -0
  62. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/__init__.py +0 -0
  63. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/_base.py +0 -0
  64. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/_player_rating.py +0 -0
  65. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/_team_rating.py +0 -0
  66. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/enums.py +0 -0
  67. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/league_identifier.py +0 -0
  68. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/player_performance_predictor.py +0 -0
  69. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/start_rating_generator.py +0 -0
  70. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/team_performance_predictor.py +0 -0
  71. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/team_start_rating_generator.py +0 -0
  72. {spforge-0.8.4 → spforge-0.8.5}/spforge/ratings/utils.py +0 -0
  73. {spforge-0.8.4 → spforge-0.8.5}/spforge/scorer/__init__.py +0 -0
  74. {spforge-0.8.4 → spforge-0.8.5}/spforge/scorer/_score.py +0 -0
  75. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/__init__.py +0 -0
  76. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_base.py +0 -0
  77. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_net_over_predicted.py +0 -0
  78. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_operator.py +0 -0
  79. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_other_transformer.py +0 -0
  80. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_predictor.py +0 -0
  81. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_simple_transformer.py +0 -0
  82. {spforge-0.8.4 → spforge-0.8.5}/spforge/transformers/_team_ratio_predictor.py +0 -0
  83. {spforge-0.8.4 → spforge-0.8.5}/spforge/utils.py +0 -0
  84. {spforge-0.8.4 → spforge-0.8.5}/spforge.egg-info/dependency_links.txt +0 -0
  85. {spforge-0.8.4 → spforge-0.8.5}/spforge.egg-info/top_level.txt +0 -0
  86. {spforge-0.8.4 → spforge-0.8.5}/tests/cross_validator/test_cross_validator.py +0 -0
  87. {spforge-0.8.4 → spforge-0.8.5}/tests/distributions/test_distribution.py +0 -0
  88. {spforge-0.8.4 → spforge-0.8.5}/tests/end_to_end/test_lol_player_kills.py +0 -0
  89. {spforge-0.8.4 → spforge-0.8.5}/tests/end_to_end/test_nba_player_points.py +0 -0
  90. {spforge-0.8.4 → spforge-0.8.5}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
  91. {spforge-0.8.4 → spforge-0.8.5}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
  92. {spforge-0.8.4 → spforge-0.8.5}/tests/estimator/test_sklearn_estimator.py +0 -0
  93. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_lag.py +0 -0
  94. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
  95. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
  96. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
  97. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_rolling_mean_days.py +0 -0
  98. {spforge-0.8.4 → spforge-0.8.5}/tests/feature_generator/test_rolling_window.py +0 -0
  99. {spforge-0.8.4 → spforge-0.8.5}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
  100. {spforge-0.8.4 → spforge-0.8.5}/tests/performance_transformers/test_performance_manager.py +0 -0
  101. {spforge-0.8.4 → spforge-0.8.5}/tests/performance_transformers/test_performances_transformers.py +0 -0
  102. {spforge-0.8.4 → spforge-0.8.5}/tests/ratings/test_player_rating_generator.py +0 -0
  103. {spforge-0.8.4 → spforge-0.8.5}/tests/ratings/test_ratings_property.py +0 -0
  104. {spforge-0.8.4 → spforge-0.8.5}/tests/ratings/test_team_rating_generator.py +0 -0
  105. {spforge-0.8.4 → spforge-0.8.5}/tests/scorer/test_score.py +0 -0
  106. {spforge-0.8.4 → spforge-0.8.5}/tests/scorer/test_score_aggregation_granularity.py +0 -0
  107. {spforge-0.8.4 → spforge-0.8.5}/tests/test_autopipeline.py +0 -0
  108. {spforge-0.8.4 → spforge-0.8.5}/tests/test_autopipeline_context.py +0 -0
  109. {spforge-0.8.4 → spforge-0.8.5}/tests/test_feature_generator_pipeline.py +0 -0
  110. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_estimator_transformer_context.py +0 -0
  111. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_net_over_predicted.py +0 -0
  112. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_other_transformer.py +0 -0
  113. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_predictor_transformer.py +0 -0
  114. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_simple_transformer.py +0 -0
  115. {spforge-0.8.4 → spforge-0.8.5}/tests/transformers/test_team_ratio_predictor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.4
3
+ Version: 0.8.5
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: numpy>=1.23.4
19
19
  Requires-Dist: optuna>=3.4.0
20
- Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: pandas<3.0.0,>=2.0.0
21
21
  Requires-Dist: pendulum>=1.0.0
22
22
  Requires-Dist: scikit-learn>=1.4.0
23
23
  Requires-Dist: lightgbm>=4.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spforge"
7
- version = "0.8.4"
7
+ version = "0.8.5"
8
8
  description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -23,7 +23,7 @@ classifiers = [
23
23
  dependencies = [
24
24
  "numpy>=1.23.4",
25
25
  "optuna>=3.4.0",
26
- "pandas>=2.0.0",
26
+ "pandas>=2.0.0,<3.0.0",
27
27
  "pendulum>=1.0.0",
28
28
  "scikit-learn >=1.4.0",
29
29
  "lightgbm>=4.0.0",
@@ -2,6 +2,7 @@ from .autopipeline import AutoPipeline as AutoPipeline
2
2
  from .data_structures import ColumnNames as ColumnNames, GameColumnNames as GameColumnNames
3
3
  from .features_generator_pipeline import FeatureGeneratorPipeline as FeatureGeneratorPipeline
4
4
  from .hyperparameter_tuning import (
5
+ EstimatorHyperparameterTuner as EstimatorHyperparameterTuner,
5
6
  OptunaResult as OptunaResult,
6
7
  ParamSpec as ParamSpec,
7
8
  RatingHyperparameterTuner as RatingHyperparameterTuner,
@@ -1,9 +1,15 @@
1
1
  from spforge.hyperparameter_tuning._default_search_spaces import (
2
+ get_default_estimator_search_space,
3
+ get_default_lgbm_search_space,
4
+ get_default_negative_binomial_search_space,
5
+ get_default_normal_distribution_search_space,
2
6
  get_default_player_rating_search_space,
3
7
  get_default_search_space,
8
+ get_default_student_t_search_space,
4
9
  get_default_team_rating_search_space,
5
10
  )
6
11
  from spforge.hyperparameter_tuning._tuner import (
12
+ EstimatorHyperparameterTuner,
7
13
  OptunaResult,
8
14
  ParamSpec,
9
15
  RatingHyperparameterTuner,
@@ -11,9 +17,15 @@ from spforge.hyperparameter_tuning._tuner import (
11
17
 
12
18
  __all__ = [
13
19
  "RatingHyperparameterTuner",
20
+ "EstimatorHyperparameterTuner",
14
21
  "ParamSpec",
15
22
  "OptunaResult",
23
+ "get_default_estimator_search_space",
24
+ "get_default_lgbm_search_space",
25
+ "get_default_negative_binomial_search_space",
26
+ "get_default_normal_distribution_search_space",
16
27
  "get_default_player_rating_search_space",
17
28
  "get_default_team_rating_search_space",
29
+ "get_default_student_t_search_space",
18
30
  "get_default_search_space",
19
31
  ]
@@ -1,5 +1,126 @@
1
1
  from spforge.hyperparameter_tuning._tuner import ParamSpec
2
2
  from spforge.ratings import PlayerRatingGenerator, TeamRatingGenerator
3
+ from spforge.distributions import (
4
+ NegativeBinomialEstimator,
5
+ NormalDistributionPredictor,
6
+ StudentTDistributionEstimator,
7
+ )
8
+
9
+
10
+ def _is_lightgbm_estimator(obj: object) -> bool:
11
+ mod = (getattr(type(obj), "__module__", "") or "").lower()
12
+ name = type(obj).__name__
13
+ if "lightgbm" in mod:
14
+ return True
15
+ return bool(name.startswith("LGBM"))
16
+
17
+
18
+ def get_default_lgbm_search_space() -> dict[str, ParamSpec]:
19
+ return {
20
+ "n_estimators": ParamSpec(
21
+ param_type="int",
22
+ low=50,
23
+ high=800,
24
+ log=True,
25
+ ),
26
+ "num_leaves": ParamSpec(
27
+ param_type="int",
28
+ low=16,
29
+ high=256,
30
+ log=True,
31
+ ),
32
+ "max_depth": ParamSpec(
33
+ param_type="int",
34
+ low=3,
35
+ high=12,
36
+ ),
37
+ "min_child_samples": ParamSpec(
38
+ param_type="int",
39
+ low=10,
40
+ high=200,
41
+ log=True,
42
+ ),
43
+ "subsample": ParamSpec(
44
+ param_type="float",
45
+ low=0.6,
46
+ high=1.0,
47
+ ),
48
+ "subsample_freq": ParamSpec(
49
+ param_type="int",
50
+ low=1,
51
+ high=7,
52
+ ),
53
+ "reg_alpha": ParamSpec(
54
+ param_type="float",
55
+ low=1e-8,
56
+ high=10.0,
57
+ log=True,
58
+ ),
59
+ "reg_lambda": ParamSpec(
60
+ param_type="float",
61
+ low=1e-8,
62
+ high=10.0,
63
+ log=True,
64
+ ),
65
+ }
66
+
67
+
68
+ def get_default_negative_binomial_search_space() -> dict[str, ParamSpec]:
69
+ return {
70
+ "predicted_r_weight": ParamSpec(
71
+ param_type="float",
72
+ low=0.0,
73
+ high=1.0,
74
+ ),
75
+ "r_rolling_mean_window": ParamSpec(
76
+ param_type="int",
77
+ low=10,
78
+ high=120,
79
+ ),
80
+ "predicted_r_iterations": ParamSpec(
81
+ param_type="int",
82
+ low=2,
83
+ high=12,
84
+ ),
85
+ }
86
+
87
+
88
+ def get_default_normal_distribution_search_space() -> dict[str, ParamSpec]:
89
+ return {
90
+ "sigma": ParamSpec(
91
+ param_type="float",
92
+ low=0.5,
93
+ high=30.0,
94
+ log=True,
95
+ ),
96
+ }
97
+
98
+
99
+ def get_default_student_t_search_space() -> dict[str, ParamSpec]:
100
+ return {
101
+ "df": ParamSpec(
102
+ param_type="float",
103
+ low=3.0,
104
+ high=30.0,
105
+ log=True,
106
+ ),
107
+ "min_sigma": ParamSpec(
108
+ param_type="float",
109
+ low=0.5,
110
+ high=10.0,
111
+ log=True,
112
+ ),
113
+ "sigma_bins": ParamSpec(
114
+ param_type="int",
115
+ low=4,
116
+ high=12,
117
+ ),
118
+ "min_bin_rows": ParamSpec(
119
+ param_type="int",
120
+ low=10,
121
+ high=100,
122
+ ),
123
+ }
3
124
 
4
125
 
5
126
  def get_default_player_rating_search_space() -> dict[str, ParamSpec]:
@@ -120,3 +241,15 @@ def get_default_search_space(
120
241
  f"Unsupported rating generator type: {type(rating_generator)}. "
121
242
  "Expected PlayerRatingGenerator or TeamRatingGenerator."
122
243
  )
244
+
245
+
246
+ def get_default_estimator_search_space(estimator: object) -> dict[str, ParamSpec]:
247
+ if _is_lightgbm_estimator(estimator):
248
+ return get_default_lgbm_search_space()
249
+ if isinstance(estimator, NegativeBinomialEstimator):
250
+ return get_default_negative_binomial_search_space()
251
+ if isinstance(estimator, NormalDistributionPredictor):
252
+ return get_default_normal_distribution_search_space()
253
+ if isinstance(estimator, StudentTDistributionEstimator):
254
+ return get_default_student_t_search_space()
255
+ return {}
@@ -45,6 +45,8 @@ class ParamSpec:
45
45
  elif self.param_type == "int":
46
46
  if self.low is None or self.high is None:
47
47
  raise ValueError(f"int parameter '{name}' requires low and high bounds")
48
+ if self.step is None:
49
+ return trial.suggest_int(name, int(self.low), int(self.high))
48
50
  return trial.suggest_int(name, int(self.low), int(self.high), step=self.step)
49
51
  elif self.param_type == "categorical":
50
52
  if self.choices is None:
@@ -272,3 +274,193 @@ class RatingHyperparameterTuner:
272
274
  raise ValueError("Scorer returned invalid values in dict")
273
275
  return float(np.mean(values))
274
276
  return float(score)
277
+
278
+
279
+ def _is_estimator(obj: object) -> bool:
280
+ return hasattr(obj, "get_params") and hasattr(obj, "set_params")
281
+
282
+
283
+ def _get_leaf_estimator_paths(estimator: Any) -> dict[str, Any]:
284
+ if not _is_estimator(estimator):
285
+ raise ValueError("estimator must implement get_params and set_params")
286
+
287
+ params = estimator.get_params(deep=True)
288
+ estimator_keys = [k for k, v in params.items() if _is_estimator(v)]
289
+
290
+ if not estimator_keys:
291
+ return {"": estimator}
292
+
293
+ leaves: list[str] = []
294
+ for key in estimator_keys:
295
+ if not any(other != key and other.startswith(f"{key}__") for other in estimator_keys):
296
+ leaves.append(key)
297
+
298
+ return {key: params[key] for key in sorted(leaves)}
299
+
300
+
301
+ def _build_search_space_for_targets(
302
+ targets: dict[str, dict[str, ParamSpec]],
303
+ ) -> dict[str, ParamSpec]:
304
+ search_space: dict[str, ParamSpec] = {}
305
+ for path, params in targets.items():
306
+ for param_name, param_spec in params.items():
307
+ full_name = f"{path}__{param_name}" if path else param_name
308
+ if full_name in search_space:
309
+ raise ValueError(f"Duplicate parameter name detected: {full_name}")
310
+ search_space[full_name] = param_spec
311
+ return search_space
312
+
313
+
314
+ def _enqueue_predicted_r_weight_zero(study: optuna.Study, search_space: dict[str, ParamSpec]):
315
+ zero_params: dict[str, float] = {}
316
+ for name, spec in search_space.items():
317
+ if not name.endswith("predicted_r_weight"):
318
+ continue
319
+ if spec.param_type not in {"float", "int"}:
320
+ continue
321
+ if spec.low is None or spec.high is None:
322
+ continue
323
+ if spec.low <= 0 <= spec.high:
324
+ zero_params[name] = 0.0
325
+
326
+ if zero_params:
327
+ study.enqueue_trial(zero_params)
328
+
329
+
330
+ class EstimatorHyperparameterTuner:
331
+ """
332
+ Hyperparameter tuner for sklearn-compatible estimators.
333
+
334
+ Supports nested estimators and can target deepest leaf estimators.
335
+ """
336
+
337
+ def __init__(
338
+ self,
339
+ estimator: Any,
340
+ cross_validator: MatchKFoldCrossValidator,
341
+ scorer: BaseScorer,
342
+ direction: Literal["minimize", "maximize"],
343
+ param_search_space: dict[str, ParamSpec] | None = None,
344
+ param_targets: dict[str, dict[str, ParamSpec]] | None = None,
345
+ n_trials: int = 50,
346
+ n_jobs: int = 1,
347
+ storage: str | None = None,
348
+ study_name: str | None = None,
349
+ timeout: float | None = None,
350
+ show_progress_bar: bool = True,
351
+ sampler: optuna.samplers.BaseSampler | None = None,
352
+ pruner: optuna.pruners.BasePruner | None = None,
353
+ ):
354
+ self.estimator = estimator
355
+ self.cross_validator = cross_validator
356
+ self.scorer = scorer
357
+ self.direction = direction
358
+ self.param_search_space = param_search_space
359
+ self.param_targets = param_targets
360
+ self.n_trials = n_trials
361
+ self.n_jobs = n_jobs
362
+ self.storage = storage
363
+ self.study_name = study_name
364
+ self.timeout = timeout
365
+ self.show_progress_bar = show_progress_bar
366
+ self.sampler = sampler
367
+ self.pruner = pruner
368
+
369
+ if direction not in ["minimize", "maximize"]:
370
+ raise ValueError(f"direction must be 'minimize' or 'maximize', got: {direction}")
371
+
372
+ if storage is not None and study_name is None:
373
+ raise ValueError("study_name is required when using storage")
374
+
375
+ if param_search_space is not None and param_targets is not None:
376
+ raise ValueError("param_search_space and param_targets cannot both be provided")
377
+
378
+ def optimize(self, df: IntoFrameT) -> OptunaResult:
379
+ from spforge.hyperparameter_tuning._default_search_spaces import (
380
+ get_default_estimator_search_space,
381
+ )
382
+
383
+ leaf_estimators = _get_leaf_estimator_paths(self.estimator)
384
+ default_targets = {
385
+ path: get_default_estimator_search_space(est)
386
+ for path, est in leaf_estimators.items()
387
+ }
388
+ default_targets = {path: space for path, space in default_targets.items() if space}
389
+
390
+ if self.param_targets is not None:
391
+ unknown = set(self.param_targets) - set(leaf_estimators)
392
+ if unknown:
393
+ raise ValueError(f"param_targets contains unknown estimator paths: {unknown}")
394
+ targets = self.param_targets
395
+ elif self.param_search_space is not None:
396
+ targets = {path: self.param_search_space for path in leaf_estimators}
397
+ elif default_targets:
398
+ targets = default_targets
399
+ else:
400
+ raise ValueError(
401
+ "param_search_space is required when no default search space is available"
402
+ )
403
+
404
+ search_space = _build_search_space_for_targets(targets)
405
+ if not search_space:
406
+ raise ValueError("Resolved search space is empty")
407
+
408
+ study = optuna.create_study(
409
+ direction=self.direction,
410
+ sampler=self.sampler,
411
+ pruner=self.pruner,
412
+ storage=self.storage,
413
+ study_name=self.study_name,
414
+ load_if_exists=True if self.storage else False,
415
+ )
416
+
417
+ _enqueue_predicted_r_weight_zero(study, search_space)
418
+
419
+ study.optimize(
420
+ lambda trial: self._objective(trial, df, search_space),
421
+ n_trials=self.n_trials,
422
+ n_jobs=self.n_jobs,
423
+ timeout=self.timeout,
424
+ show_progress_bar=self.show_progress_bar,
425
+ )
426
+
427
+ return OptunaResult(
428
+ best_params=study.best_params,
429
+ best_value=study.best_value,
430
+ best_trial=study.best_trial,
431
+ study=study,
432
+ )
433
+
434
+ def _objective(
435
+ self, trial: optuna.Trial, df: IntoFrameT, search_space: dict[str, ParamSpec]
436
+ ) -> float:
437
+ try:
438
+ trial_params = self._suggest_params(trial, search_space)
439
+
440
+ copied_estimator = copy.deepcopy(self.estimator)
441
+ copied_estimator.set_params(**trial_params)
442
+
443
+ cv = copy.deepcopy(self.cross_validator)
444
+ cv.estimator = copied_estimator
445
+
446
+ validation_df = cv.generate_validation_df(df)
447
+ score = self.scorer.score(validation_df)
448
+ score_value = RatingHyperparameterTuner._aggregate_score(score)
449
+
450
+ if math.isnan(score_value) or math.isinf(score_value):
451
+ logger.warning(f"Trial {trial.number} returned invalid score: {score_value}")
452
+ return float("inf") if self.direction == "minimize" else float("-inf")
453
+
454
+ return score_value
455
+
456
+ except Exception as e:
457
+ logger.warning(f"Trial {trial.number} failed with error: {e}")
458
+ return float("inf") if self.direction == "minimize" else float("-inf")
459
+
460
+ def _suggest_params(
461
+ self, trial: optuna.Trial, search_space: dict[str, ParamSpec]
462
+ ) -> dict[str, Any]:
463
+ params: dict[str, Any] = {}
464
+ for param_name, param_spec in search_space.items():
465
+ params[param_name] = param_spec.suggest(trial, param_name)
466
+ return params
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.4
3
+ Version: 0.8.5
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: numpy>=1.23.4
19
19
  Requires-Dist: optuna>=3.4.0
20
- Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: pandas<3.0.0,>=2.0.0
21
21
  Requires-Dist: pendulum>=1.0.0
22
22
  Requires-Dist: scikit-learn>=1.4.0
23
23
  Requires-Dist: lightgbm>=4.0.0
@@ -84,6 +84,7 @@ tests/test_autopipeline_context.py
84
84
  tests/test_feature_generator_pipeline.py
85
85
  tests/cross_validator/test_cross_validator.py
86
86
  tests/distributions/test_distribution.py
87
+ tests/end_to_end/test_estimator_hyperparameter_tuning.py
87
88
  tests/end_to_end/test_lol_player_kills.py
88
89
  tests/end_to_end/test_nba_player_points.py
89
90
  tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py
@@ -95,6 +96,7 @@ tests/feature_generator/test_rolling_against_opponent.py
95
96
  tests/feature_generator/test_rolling_mean_binary.py
96
97
  tests/feature_generator/test_rolling_mean_days.py
97
98
  tests/feature_generator/test_rolling_window.py
99
+ tests/hyperparameter_tuning/test_estimator_tuner.py
98
100
  tests/hyperparameter_tuning/test_rating_tuner.py
99
101
  tests/performance_transformers/test_performance_manager.py
100
102
  tests/performance_transformers/test_performances_transformers.py
@@ -1,6 +1,6 @@
1
1
  numpy>=1.23.4
2
2
  optuna>=3.4.0
3
- pandas>=2.0.0
3
+ pandas<3.0.0,>=2.0.0
4
4
  pendulum>=1.0.0
5
5
  scikit-learn>=1.4.0
6
6
  lightgbm>=4.0.0
@@ -0,0 +1,85 @@
1
+ import polars as pl
2
+ from sklearn.linear_model import LogisticRegression
3
+ from sklearn.metrics import mean_absolute_error
4
+
5
+ from examples import get_sub_sample_nba_data
6
+ from spforge import AutoPipeline, ColumnNames, EstimatorHyperparameterTuner, ParamSpec
7
+ from spforge.cross_validator import MatchKFoldCrossValidator
8
+ from spforge.scorer import SklearnScorer
9
+
10
+
11
+ def test_nba_estimator_hyperparameter_tuning__workflow_completes():
12
+ df = get_sub_sample_nba_data(as_polars=True, as_pandas=False)
13
+ column_names = ColumnNames(
14
+ team_id="team_id",
15
+ match_id="game_id",
16
+ start_date="start_date",
17
+ player_id="player_id",
18
+ participation_weight="minutes_ratio",
19
+ )
20
+
21
+ df = df.sort(
22
+ [
23
+ column_names.start_date,
24
+ column_names.match_id,
25
+ column_names.team_id,
26
+ column_names.player_id,
27
+ ]
28
+ )
29
+
30
+ df = df.with_columns(
31
+ [
32
+ (pl.col("minutes") / pl.col("minutes").sum().over("game_id")).alias(
33
+ "minutes_ratio"
34
+ ),
35
+ (pl.col("points") > pl.lit(10)).cast(pl.Int64).alias("points_over_10"),
36
+ ]
37
+ )
38
+
39
+ estimator = AutoPipeline(
40
+ estimator=LogisticRegression(max_iter=200),
41
+ estimator_features=["minutes", "minutes_ratio"],
42
+ )
43
+
44
+ cv = MatchKFoldCrossValidator(
45
+ match_id_column_name=column_names.match_id,
46
+ date_column_name=column_names.start_date,
47
+ target_column="points_over_10",
48
+ estimator=estimator,
49
+ prediction_column_name="points_pred",
50
+ n_splits=2,
51
+ features=estimator.required_features,
52
+ )
53
+
54
+ scorer = SklearnScorer(
55
+ scorer_function=mean_absolute_error,
56
+ pred_column="points_pred",
57
+ target="points_over_10",
58
+ validation_column="is_validation",
59
+ )
60
+
61
+ tuner = EstimatorHyperparameterTuner(
62
+ estimator=estimator,
63
+ cross_validator=cv,
64
+ scorer=scorer,
65
+ direction="minimize",
66
+ param_search_space={
67
+ "C": ParamSpec(
68
+ param_type="float",
69
+ low=0.1,
70
+ high=2.0,
71
+ log=True,
72
+ ),
73
+ },
74
+ n_trials=3,
75
+ show_progress_bar=False,
76
+ )
77
+
78
+ result = tuner.optimize(df)
79
+
80
+ assert result.best_params is not None
81
+ assert isinstance(result.best_params, dict)
82
+ assert "estimator__C" in result.best_params
83
+ assert isinstance(result.best_value, float)
84
+ assert result.best_trial is not None
85
+ assert result.study is not None
@@ -0,0 +1,167 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import pytest
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.linear_model import LogisticRegression
6
+
7
+ from spforge import EstimatorHyperparameterTuner, ParamSpec
8
+ from spforge.cross_validator import MatchKFoldCrossValidator
9
+ from spforge.estimator import SkLearnEnhancerEstimator
10
+ from spforge.scorer import MeanBiasScorer
11
+
12
+
13
+ class FakeLGBMClassifier(BaseEstimator):
14
+ __module__ = "lightgbm.sklearn"
15
+
16
+ def __init__(
17
+ self,
18
+ n_estimators: int = 100,
19
+ num_leaves: int = 31,
20
+ max_depth: int = 5,
21
+ min_child_samples: int = 20,
22
+ subsample: float = 1.0,
23
+ subsample_freq: int = 1,
24
+ reg_alpha: float = 0.0,
25
+ reg_lambda: float = 0.0,
26
+ ):
27
+ self.n_estimators = n_estimators
28
+ self.num_leaves = num_leaves
29
+ self.max_depth = max_depth
30
+ self.min_child_samples = min_child_samples
31
+ self.subsample = subsample
32
+ self.subsample_freq = subsample_freq
33
+ self.reg_alpha = reg_alpha
34
+ self.reg_lambda = reg_lambda
35
+
36
+ def fit(self, X, y):
37
+ self.classes_ = np.unique(y)
38
+ return self
39
+
40
+ def predict_proba(self, X):
41
+ n = len(X)
42
+ if len(self.classes_) < 2:
43
+ return np.ones((n, 1))
44
+ return np.tile([0.4, 0.6], (n, 1))
45
+
46
+ def predict(self, X):
47
+ n = len(X)
48
+ if len(self.classes_) == 1:
49
+ return np.full(n, self.classes_[0])
50
+ proba = self.predict_proba(X)
51
+ idx = np.argmax(proba, axis=1)
52
+ return np.array(self.classes_)[idx]
53
+
54
+
55
+ @pytest.fixture
56
+ def sample_df():
57
+ dates = pd.date_range("2024-01-01", periods=12, freq="D")
58
+ rows = []
59
+ for i, date in enumerate(dates):
60
+ rows.append(
61
+ {
62
+ "mid": f"M{i // 2}",
63
+ "date": date,
64
+ "x1": float(i),
65
+ "y": 1 if i % 2 == 0 else 0,
66
+ }
67
+ )
68
+ return pd.DataFrame(rows)
69
+
70
+
71
+ @pytest.fixture
72
+ def scorer():
73
+ return MeanBiasScorer(
74
+ pred_column="y_pred",
75
+ target="y",
76
+ validation_column="is_validation",
77
+ )
78
+
79
+
80
+ def test_estimator_tuner_requires_search_space(sample_df, scorer):
81
+ estimator = LogisticRegression()
82
+
83
+ cv = MatchKFoldCrossValidator(
84
+ match_id_column_name="mid",
85
+ date_column_name="date",
86
+ target_column="y",
87
+ estimator=estimator,
88
+ prediction_column_name="y_pred",
89
+ n_splits=2,
90
+ features=["x1"],
91
+ )
92
+
93
+ tuner = EstimatorHyperparameterTuner(
94
+ estimator=estimator,
95
+ cross_validator=cv,
96
+ scorer=scorer,
97
+ direction="minimize",
98
+ n_trials=2,
99
+ show_progress_bar=False,
100
+ )
101
+
102
+ with pytest.raises(ValueError, match="param_search_space is required"):
103
+ tuner.optimize(sample_df)
104
+
105
+
106
+ def test_estimator_tuner_custom_search_space(sample_df, scorer):
107
+ estimator = SkLearnEnhancerEstimator(estimator=LogisticRegression())
108
+
109
+ cv = MatchKFoldCrossValidator(
110
+ match_id_column_name="mid",
111
+ date_column_name="date",
112
+ target_column="y",
113
+ estimator=estimator,
114
+ prediction_column_name="y_pred",
115
+ n_splits=2,
116
+ features=["x1"],
117
+ )
118
+
119
+ tuner = EstimatorHyperparameterTuner(
120
+ estimator=estimator,
121
+ cross_validator=cv,
122
+ scorer=scorer,
123
+ direction="minimize",
124
+ param_search_space={
125
+ "C": ParamSpec(
126
+ param_type="float",
127
+ low=0.1,
128
+ high=2.0,
129
+ log=True,
130
+ )
131
+ },
132
+ n_trials=2,
133
+ show_progress_bar=False,
134
+ )
135
+
136
+ result = tuner.optimize(sample_df)
137
+
138
+ assert "estimator__C" in result.best_params
139
+ assert isinstance(result.best_value, float)
140
+
141
+
142
+ def test_estimator_tuner_lgbm_defaults(sample_df, scorer):
143
+ estimator = FakeLGBMClassifier()
144
+
145
+ cv = MatchKFoldCrossValidator(
146
+ match_id_column_name="mid",
147
+ date_column_name="date",
148
+ target_column="y",
149
+ estimator=estimator,
150
+ prediction_column_name="y_pred",
151
+ n_splits=2,
152
+ features=["x1"],
153
+ )
154
+
155
+ tuner = EstimatorHyperparameterTuner(
156
+ estimator=estimator,
157
+ cross_validator=cv,
158
+ scorer=scorer,
159
+ direction="minimize",
160
+ n_trials=2,
161
+ show_progress_bar=False,
162
+ )
163
+
164
+ result = tuner.optimize(sample_df)
165
+
166
+ assert "n_estimators" in result.best_params
167
+ assert isinstance(result.best_value, float)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes