spforge 0.8.11__tar.gz → 0.8.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (119) hide show
  1. {spforge-0.8.11/spforge.egg-info → spforge-0.8.14}/PKG-INFO +1 -1
  2. {spforge-0.8.11 → spforge-0.8.14}/pyproject.toml +1 -1
  3. {spforge-0.8.11 → spforge-0.8.14}/spforge/autopipeline.py +138 -0
  4. {spforge-0.8.11 → spforge-0.8.14/spforge.egg-info}/PKG-INFO +1 -1
  5. {spforge-0.8.11 → spforge-0.8.14}/tests/test_autopipeline.py +172 -0
  6. {spforge-0.8.11 → spforge-0.8.14}/LICENSE +0 -0
  7. {spforge-0.8.11 → spforge-0.8.14}/MANIFEST.in +0 -0
  8. {spforge-0.8.11 → spforge-0.8.14}/README.md +0 -0
  9. {spforge-0.8.11 → spforge-0.8.14}/examples/__init__.py +0 -0
  10. {spforge-0.8.11 → spforge-0.8.14}/examples/game_level_example.py +0 -0
  11. {spforge-0.8.11 → spforge-0.8.14}/examples/lol/__init__.py +0 -0
  12. {spforge-0.8.11 → spforge-0.8.14}/examples/lol/data/__init__.py +0 -0
  13. {spforge-0.8.11 → spforge-0.8.14}/examples/lol/data/subsample_lol_data.parquet +0 -0
  14. {spforge-0.8.11 → spforge-0.8.14}/examples/lol/data/utils.py +0 -0
  15. {spforge-0.8.11 → spforge-0.8.14}/examples/lol/pipeline_transformer_example.py +0 -0
  16. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/__init__.py +0 -0
  17. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/cross_validation_example.py +0 -0
  18. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/data/__init__.py +0 -0
  19. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/data/game_player_subsample.parquet +0 -0
  20. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/data/utils.py +0 -0
  21. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/feature_engineering_example.py +0 -0
  22. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/game_winner_example.py +0 -0
  23. {spforge-0.8.11 → spforge-0.8.14}/examples/nba/predictor_transformers_example.py +0 -0
  24. {spforge-0.8.11 → spforge-0.8.14}/setup.cfg +0 -0
  25. {spforge-0.8.11 → spforge-0.8.14}/spforge/__init__.py +0 -0
  26. {spforge-0.8.11 → spforge-0.8.14}/spforge/base_feature_generator.py +0 -0
  27. {spforge-0.8.11 → spforge-0.8.14}/spforge/cross_validator/__init__.py +0 -0
  28. {spforge-0.8.11 → spforge-0.8.14}/spforge/cross_validator/_base.py +0 -0
  29. {spforge-0.8.11 → spforge-0.8.14}/spforge/cross_validator/cross_validator.py +0 -0
  30. {spforge-0.8.11 → spforge-0.8.14}/spforge/data_structures.py +0 -0
  31. {spforge-0.8.11 → spforge-0.8.14}/spforge/distributions/__init__.py +0 -0
  32. {spforge-0.8.11 → spforge-0.8.14}/spforge/distributions/_negative_binomial_estimator.py +0 -0
  33. {spforge-0.8.11 → spforge-0.8.14}/spforge/distributions/_normal_distribution_predictor.py +0 -0
  34. {spforge-0.8.11 → spforge-0.8.14}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
  35. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/__init__.py +0 -0
  36. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_conditional_estimator.py +0 -0
  37. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
  38. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_granularity_estimator.py +0 -0
  39. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_group_by_estimator.py +0 -0
  40. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_ordinal_classifier.py +0 -0
  41. {spforge-0.8.11 → spforge-0.8.14}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
  42. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/__init__.py +0 -0
  43. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_base.py +0 -0
  44. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_lag.py +0 -0
  45. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_net_over_predicted.py +0 -0
  46. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
  47. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
  48. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
  49. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_rolling_mean_days.py +0 -0
  50. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_rolling_window.py +0 -0
  51. {spforge-0.8.11 → spforge-0.8.14}/spforge/feature_generator/_utils.py +0 -0
  52. {spforge-0.8.11 → spforge-0.8.14}/spforge/features_generator_pipeline.py +0 -0
  53. {spforge-0.8.11 → spforge-0.8.14}/spforge/hyperparameter_tuning/__init__.py +0 -0
  54. {spforge-0.8.11 → spforge-0.8.14}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
  55. {spforge-0.8.11 → spforge-0.8.14}/spforge/hyperparameter_tuning/_tuner.py +0 -0
  56. {spforge-0.8.11 → spforge-0.8.14}/spforge/performance_transformers/__init__.py +0 -0
  57. {spforge-0.8.11 → spforge-0.8.14}/spforge/performance_transformers/_performance_manager.py +0 -0
  58. {spforge-0.8.11 → spforge-0.8.14}/spforge/performance_transformers/_performances_transformers.py +0 -0
  59. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/__init__.py +0 -0
  60. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/_base.py +0 -0
  61. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/_player_rating.py +0 -0
  62. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/_team_rating.py +0 -0
  63. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/enums.py +0 -0
  64. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/league_identifier.py +0 -0
  65. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/league_start_rating_optimizer.py +0 -0
  66. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/player_performance_predictor.py +0 -0
  67. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/start_rating_generator.py +0 -0
  68. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/team_performance_predictor.py +0 -0
  69. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/team_start_rating_generator.py +0 -0
  70. {spforge-0.8.11 → spforge-0.8.14}/spforge/ratings/utils.py +0 -0
  71. {spforge-0.8.11 → spforge-0.8.14}/spforge/scorer/__init__.py +0 -0
  72. {spforge-0.8.11 → spforge-0.8.14}/spforge/scorer/_score.py +0 -0
  73. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/__init__.py +0 -0
  74. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_base.py +0 -0
  75. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_net_over_predicted.py +0 -0
  76. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_operator.py +0 -0
  77. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_other_transformer.py +0 -0
  78. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_predictor.py +0 -0
  79. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_simple_transformer.py +0 -0
  80. {spforge-0.8.11 → spforge-0.8.14}/spforge/transformers/_team_ratio_predictor.py +0 -0
  81. {spforge-0.8.11 → spforge-0.8.14}/spforge/utils.py +0 -0
  82. {spforge-0.8.11 → spforge-0.8.14}/spforge.egg-info/SOURCES.txt +0 -0
  83. {spforge-0.8.11 → spforge-0.8.14}/spforge.egg-info/dependency_links.txt +0 -0
  84. {spforge-0.8.11 → spforge-0.8.14}/spforge.egg-info/requires.txt +0 -0
  85. {spforge-0.8.11 → spforge-0.8.14}/spforge.egg-info/top_level.txt +0 -0
  86. {spforge-0.8.11 → spforge-0.8.14}/tests/cross_validator/test_cross_validator.py +0 -0
  87. {spforge-0.8.11 → spforge-0.8.14}/tests/distributions/test_distribution.py +0 -0
  88. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
  89. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
  90. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_lol_player_kills.py +0 -0
  91. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_nba_player_points.py +0 -0
  92. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
  93. {spforge-0.8.11 → spforge-0.8.14}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
  94. {spforge-0.8.11 → spforge-0.8.14}/tests/estimator/test_sklearn_estimator.py +0 -0
  95. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_lag.py +0 -0
  96. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
  97. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
  98. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
  99. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_rolling_mean_days.py +0 -0
  100. {spforge-0.8.11 → spforge-0.8.14}/tests/feature_generator/test_rolling_window.py +0 -0
  101. {spforge-0.8.11 → spforge-0.8.14}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
  102. {spforge-0.8.11 → spforge-0.8.14}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
  103. {spforge-0.8.11 → spforge-0.8.14}/tests/performance_transformers/test_performance_manager.py +0 -0
  104. {spforge-0.8.11 → spforge-0.8.14}/tests/performance_transformers/test_performances_transformers.py +0 -0
  105. {spforge-0.8.11 → spforge-0.8.14}/tests/ratings/test_player_rating_generator.py +0 -0
  106. {spforge-0.8.11 → spforge-0.8.14}/tests/ratings/test_player_rating_no_mutation.py +0 -0
  107. {spforge-0.8.11 → spforge-0.8.14}/tests/ratings/test_ratings_property.py +0 -0
  108. {spforge-0.8.11 → spforge-0.8.14}/tests/ratings/test_team_rating_generator.py +0 -0
  109. {spforge-0.8.11 → spforge-0.8.14}/tests/ratings/test_utils_scaled_weights.py +0 -0
  110. {spforge-0.8.11 → spforge-0.8.14}/tests/scorer/test_score.py +0 -0
  111. {spforge-0.8.11 → spforge-0.8.14}/tests/scorer/test_score_aggregation_granularity.py +0 -0
  112. {spforge-0.8.11 → spforge-0.8.14}/tests/test_autopipeline_context.py +0 -0
  113. {spforge-0.8.11 → spforge-0.8.14}/tests/test_feature_generator_pipeline.py +0 -0
  114. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_estimator_transformer_context.py +0 -0
  115. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_net_over_predicted.py +0 -0
  116. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_other_transformer.py +0 -0
  117. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_predictor_transformer.py +0 -0
  118. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_simple_transformer.py +0 -0
  119. {spforge-0.8.11 → spforge-0.8.14}/tests/transformers/test_team_ratio_predictor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.11
3
+ Version: 0.8.14
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spforge"
7
- version = "0.8.11"
7
+ version = "0.8.14"
8
8
  description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -195,6 +195,40 @@ def lgbm_in_root(root) -> bool:
195
195
  return any(_is_lightgbm_estimator(obj) for obj in _walk_objects(root))
196
196
 
197
197
 
198
+ def _get_importance_estimator(estimator) -> tuple[Any, str] | None:
199
+ """Recursively find innermost estimator with feature_importances_ or coef_."""
200
+ if hasattr(estimator, "feature_importances_"):
201
+ inner = _get_importance_estimator_inner(estimator)
202
+ if inner is not None:
203
+ return inner
204
+ return (estimator, "feature_importances_")
205
+
206
+ if hasattr(estimator, "coef_"):
207
+ inner = _get_importance_estimator_inner(estimator)
208
+ if inner is not None:
209
+ return inner
210
+ return (estimator, "coef_")
211
+
212
+ return _get_importance_estimator_inner(estimator)
213
+
214
+
215
+ def _get_importance_estimator_inner(estimator) -> tuple[Any, str] | None:
216
+ """Check wrapped estimators for importance attributes."""
217
+ # Check estimator_ (sklearn fitted wrapper convention)
218
+ if hasattr(estimator, "estimator_") and estimator.estimator_ is not None:
219
+ result = _get_importance_estimator(estimator.estimator_)
220
+ if result is not None:
221
+ return result
222
+
223
+ # Check _est (GroupByEstimator convention)
224
+ if hasattr(estimator, "_est") and estimator._est is not None:
225
+ result = _get_importance_estimator(estimator._est)
226
+ if result is not None:
227
+ return result
228
+
229
+ return None
230
+
231
+
198
232
  class AutoPipeline(BaseEstimator):
199
233
  def __init__(
200
234
  self,
@@ -627,3 +661,107 @@ class AutoPipeline(BaseEstimator):
627
661
  all_features.append(ctx)
628
662
 
629
663
  return all_features
664
+
665
+ def _get_estimator_feature_names(self) -> list[str]:
666
+ """Get feature names as seen by the final estimator after all transformations."""
667
+ pre_out = list(self.sklearn_pipeline.named_steps["pre"].get_feature_names_out())
668
+
669
+ # Remove context columns dropped by "final" step
670
+ final_step = self.sklearn_pipeline.named_steps["final"]
671
+ drop_cols = final_step.kw_args.get("drop_cols", set()) if final_step.kw_args else set()
672
+ features = [f for f in pre_out if f not in drop_cols]
673
+
674
+ # Remove granularity columns (dropped by GroupByEstimator)
675
+ granularity_set = set(self.granularity)
676
+ features = [f for f in features if f not in granularity_set]
677
+
678
+ # Remove context features (used by wrapper estimators, not inner model)
679
+ context_set = set(self.context_feature_names)
680
+ features = [f for f in features if f not in context_set]
681
+
682
+ return features
683
+
684
+ def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
685
+ names = None
686
+ if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
687
+ names = list(estimator.feature_names_in_)
688
+ elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
689
+ names = list(estimator.feature_name_)
690
+ elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
691
+ names = list(estimator.feature_names_)
692
+ if names is None:
693
+ names = self._get_estimator_feature_names()
694
+ if len(names) != n_features:
695
+ raise ValueError(
696
+ f"Feature names length ({len(names)}) does not match importances length ({n_features})."
697
+ )
698
+ return names
699
+
700
+ @property
701
+ def feature_importances_(self) -> pd.DataFrame:
702
+ """Get feature importances from the fitted estimator.
703
+
704
+ Returns a DataFrame with columns ["feature", "importance"] sorted by
705
+ absolute importance descending. Works with tree-based models
706
+ (feature_importances_) and linear models (coef_).
707
+ """
708
+ if self.sklearn_pipeline is None:
709
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
710
+
711
+ est = self.sklearn_pipeline.named_steps["est"]
712
+ result = _get_importance_estimator(est)
713
+
714
+ if result is None:
715
+ raise RuntimeError(
716
+ "Estimator does not support feature importances. "
717
+ "Requires feature_importances_ or coef_ attribute."
718
+ )
719
+
720
+ inner_est, attr_name = result
721
+ raw = getattr(inner_est, attr_name)
722
+
723
+ if attr_name == "coef_":
724
+ # Linear models: use absolute value of coefficients
725
+ if raw.ndim == 2:
726
+ # Multi-class: average absolute values across classes
727
+ importances = np.abs(raw).mean(axis=0)
728
+ else:
729
+ importances = np.abs(raw)
730
+ else:
731
+ importances = raw
732
+
733
+ feature_names = self._get_estimator_feature_names()
734
+
735
+ df = pd.DataFrame({"feature": feature_names, "importance": importances})
736
+ df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
737
+ return df
738
+
739
+ @property
740
+ def feature_importance_names(self) -> dict[str, float]:
741
+ """Map deepest estimator feature names to importances."""
742
+ if self.sklearn_pipeline is None:
743
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
744
+
745
+ est = self.sklearn_pipeline.named_steps["est"]
746
+ result = _get_importance_estimator(est)
747
+
748
+ if result is None:
749
+ raise RuntimeError(
750
+ "Estimator does not support feature importances. "
751
+ "Requires feature_importances_ or coef_ attribute."
752
+ )
753
+
754
+ inner_est, attr_name = result
755
+ raw = getattr(inner_est, attr_name)
756
+
757
+ if attr_name == "coef_":
758
+ if raw.ndim == 2:
759
+ importances = np.abs(raw).mean(axis=0)
760
+ else:
761
+ importances = np.abs(raw)
762
+ else:
763
+ importances = raw
764
+
765
+ importances = np.asarray(importances)
766
+ feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
767
+ return dict(zip(feature_names, importances.tolist()))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.11
3
+ Version: 0.8.14
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -551,3 +551,175 @@ def test_autopipeline_is_picklable_after_fit():
551
551
  model.fit(df, y)
552
552
 
553
553
  pickle.dumps(model)
554
+
555
+
556
+ # --- Feature Importances Tests ---
557
+
558
+
559
+ def test_feature_importances__tree_model():
560
+ from sklearn.ensemble import RandomForestRegressor
561
+
562
+ df = pd.DataFrame(
563
+ {
564
+ "num1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
565
+ "num2": [10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
566
+ "cat1": ["a", "b", "a", "b", "a", "b"],
567
+ }
568
+ )
569
+ y = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="y")
570
+
571
+ model = AutoPipeline(
572
+ estimator=RandomForestRegressor(n_estimators=5, random_state=42),
573
+ estimator_features=["num1", "num2", "cat1"],
574
+ categorical_handling="ordinal",
575
+ )
576
+ model.fit(df, y)
577
+
578
+ importances = model.feature_importances_
579
+
580
+ assert isinstance(importances, pd.DataFrame)
581
+ assert list(importances.columns) == ["feature", "importance"]
582
+ assert len(importances) == 3
583
+ assert set(importances["feature"].tolist()) == {"num1", "num2", "cat1"}
584
+ assert all(importances["importance"] >= 0)
585
+
586
+
587
+ def test_feature_importances__linear_model():
588
+ df = pd.DataFrame(
589
+ {
590
+ "num1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
591
+ "num2": [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0],
592
+ }
593
+ )
594
+ y = pd.Series([0, 1, 0, 1, 0, 1, 0, 1], name="y")
595
+
596
+ model = AutoPipeline(
597
+ estimator=LogisticRegression(max_iter=1000),
598
+ estimator_features=["num1", "num2"],
599
+ scale_features=True,
600
+ )
601
+ model.fit(df, y)
602
+
603
+ importances = model.feature_importances_
604
+
605
+ assert isinstance(importances, pd.DataFrame)
606
+ assert list(importances.columns) == ["feature", "importance"]
607
+ assert len(importances) == 2
608
+ assert set(importances["feature"].tolist()) == {"num1", "num2"}
609
+ assert all(importances["importance"] >= 0)
610
+
611
+
612
+ def test_feature_importances__not_fitted_raises():
613
+ model = AutoPipeline(
614
+ estimator=LinearRegression(),
615
+ estimator_features=["x"],
616
+ )
617
+
618
+ with pytest.raises(RuntimeError, match="Pipeline not fitted"):
619
+ _ = model.feature_importances_
620
+
621
+
622
+ def test_feature_importances__unsupported_estimator_raises():
623
+ df = pd.DataFrame({"x": [1.0, 2.0, 3.0, 4.0]})
624
+ y = pd.Series([1.0, 2.0, 3.0, 4.0], name="y")
625
+
626
+ model = AutoPipeline(
627
+ estimator=DummyRegressor(),
628
+ estimator_features=["x"],
629
+ )
630
+ model.fit(df, y)
631
+
632
+ with pytest.raises(RuntimeError, match="does not support feature importances"):
633
+ _ = model.feature_importances_
634
+
635
+
636
+ def test_feature_importances__with_sklearn_enhancer():
637
+ from sklearn.ensemble import RandomForestRegressor
638
+
639
+ df = pd.DataFrame(
640
+ {
641
+ "num1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
642
+ "num2": [10.0, 20.0, 30.0, 40.0, 50.0, 60.0],
643
+ "start_date": ["2022-01-01", "2022-01-02", "2022-01-03", "2022-01-04", "2022-01-05", "2022-01-06"],
644
+ }
645
+ )
646
+ y = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="y")
647
+
648
+ inner = RandomForestRegressor(n_estimators=5, random_state=42)
649
+ enhancer = SkLearnEnhancerEstimator(
650
+ estimator=inner,
651
+ date_column="start_date",
652
+ day_weight_epsilon=0.1,
653
+ )
654
+
655
+ model = AutoPipeline(
656
+ estimator=enhancer,
657
+ estimator_features=["num1", "num2"],
658
+ )
659
+ model.fit(df, y)
660
+
661
+ importances = model.feature_importances_
662
+
663
+ assert isinstance(importances, pd.DataFrame)
664
+ assert list(importances.columns) == ["feature", "importance"]
665
+ assert len(importances) == 2
666
+ assert set(importances["feature"].tolist()) == {"num1", "num2"}
667
+
668
+
669
+ def test_feature_importances__onehot_features():
670
+ from sklearn.ensemble import RandomForestRegressor
671
+
672
+ df = pd.DataFrame(
673
+ {
674
+ "num1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
675
+ "cat1": ["a", "b", "c", "a", "b", "c"],
676
+ }
677
+ )
678
+ y = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="y")
679
+
680
+ model = AutoPipeline(
681
+ estimator=RandomForestRegressor(n_estimators=5, random_state=42),
682
+ estimator_features=["num1", "cat1"],
683
+ categorical_handling="onehot",
684
+ )
685
+ model.fit(df, y)
686
+
687
+ importances = model.feature_importances_
688
+
689
+ assert isinstance(importances, pd.DataFrame)
690
+ assert list(importances.columns) == ["feature", "importance"]
691
+ # Should have expanded features: num1 + cat1_a, cat1_b, cat1_c
692
+ assert len(importances) == 4
693
+ assert "num1" in importances["feature"].tolist()
694
+ assert any("cat1_" in f for f in importances["feature"].tolist())
695
+
696
+
697
+ def test_feature_importance_names__granularity_uses_deep_feature_names():
698
+ from sklearn.ensemble import RandomForestRegressor
699
+
700
+ df = pd.DataFrame(
701
+ {
702
+ "gameid": ["g1", "g1", "g2", "g2"],
703
+ "num1": [1.0, 2.0, 3.0, 4.0],
704
+ "num2": [10.0, 20.0, 30.0, 40.0],
705
+ }
706
+ )
707
+ y = pd.Series([1.0, 2.0, 3.0, 4.0], name="y")
708
+
709
+ model = AutoPipeline(
710
+ estimator=RandomForestRegressor(n_estimators=5, random_state=42),
711
+ estimator_features=["gameid", "num1", "num2"],
712
+ predictor_transformers=[AddConstantPredictionTransformer(col_name="const_pred")],
713
+ granularity=["gameid"],
714
+ categorical_features=["gameid"],
715
+ categorical_handling="ordinal",
716
+ remainder="drop",
717
+ )
718
+ model.fit(df, y)
719
+
720
+ names = model.feature_importance_names
721
+
722
+ inner = _inner_estimator(model)
723
+ assert list(names.keys()) == list(inner.feature_names_in_)
724
+ assert "gameid" not in names
725
+ assert "const_pred" in names
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes