spforge 0.8.13__py3-none-any.whl → 0.8.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spforge/autopipeline.py CHANGED
@@ -681,6 +681,22 @@ class AutoPipeline(BaseEstimator):
681
681
 
682
682
  return features
683
683
 
684
+ def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
685
+ names = None
686
+ if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
687
+ names = list(estimator.feature_names_in_)
688
+ elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
689
+ names = list(estimator.feature_name_)
690
+ elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
691
+ names = list(estimator.feature_names_)
692
+ if names is None:
693
+ names = self._get_estimator_feature_names()
694
+ if len(names) != n_features:
695
+ raise ValueError(
696
+ f"Feature names length ({len(names)}) does not match importances length ({n_features})."
697
+ )
698
+ return names
699
+
684
700
  @property
685
701
  def feature_importances_(self) -> pd.DataFrame:
686
702
  """Get feature importances from the fitted estimator.
@@ -719,3 +735,33 @@ class AutoPipeline(BaseEstimator):
719
735
  df = pd.DataFrame({"feature": feature_names, "importance": importances})
720
736
  df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
721
737
  return df
738
+
739
+ @property
740
+ def feature_importance_names(self) -> dict[str, float]:
741
+ """Map deepest estimator feature names to importances."""
742
+ if self.sklearn_pipeline is None:
743
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
744
+
745
+ est = self.sklearn_pipeline.named_steps["est"]
746
+ result = _get_importance_estimator(est)
747
+
748
+ if result is None:
749
+ raise RuntimeError(
750
+ "Estimator does not support feature importances. "
751
+ "Requires feature_importances_ or coef_ attribute."
752
+ )
753
+
754
+ inner_est, attr_name = result
755
+ raw = getattr(inner_est, attr_name)
756
+
757
+ if attr_name == "coef_":
758
+ if raw.ndim == 2:
759
+ importances = np.abs(raw).mean(axis=0)
760
+ else:
761
+ importances = np.abs(raw)
762
+ else:
763
+ importances = raw
764
+
765
+ importances = np.asarray(importances)
766
+ feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
767
+ return dict(zip(feature_names, importances.tolist()))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.13
3
+ Version: 0.8.14
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -14,7 +14,7 @@ examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
14
14
  examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
15
15
  examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
16
16
  spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
17
- spforge/autopipeline.py,sha256=4DAm_Wxtzvum3_7OViRru22HPD4TPHrjtlsalvtkzKM,26834
17
+ spforge/autopipeline.py,sha256=q3EbeMvSuxyyaDj_uKSxdcCHlvORXAVJ4JAPNbeYvCs,28744
18
18
  spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
19
19
  spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
20
20
  spforge/features_generator_pipeline.py,sha256=n8vzZKqXNFcFRDWZhllnkhAh5NFXdOD3FEIOpHcay8E,8208
@@ -71,8 +71,8 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
71
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
72
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
73
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
74
- spforge-0.8.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
- tests/test_autopipeline.py,sha256=i4DBraTZT0_OQ7kabTausV5LsyvMhfvKTxCd-Gtz00U,21269
74
+ spforge-0.8.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
75
+ tests/test_autopipeline.py,sha256=KHDuhPOzfezKj_FB7k5XQhxsreLFEiGUEYvybZZn-YA,22258
76
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
77
77
  tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
78
78
  tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
@@ -107,7 +107,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
107
107
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
108
108
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
109
109
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
110
- spforge-0.8.13.dist-info/METADATA,sha256=cGBuOH1Pk7txtWsIMtIhWNBqNQwYILyxoZVjr3jzKHw,20048
111
- spforge-0.8.13.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
- spforge-0.8.13.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
- spforge-0.8.13.dist-info/RECORD,,
110
+ spforge-0.8.14.dist-info/METADATA,sha256=QjJItgB1kBct3TPaiUbTmy-wkqw2hVG69tRq55WFXWU,20048
111
+ spforge-0.8.14.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
112
+ spforge-0.8.14.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
113
+ spforge-0.8.14.dist-info/RECORD,,
@@ -692,3 +692,34 @@ def test_feature_importances__onehot_features():
692
692
  assert len(importances) == 4
693
693
  assert "num1" in importances["feature"].tolist()
694
694
  assert any("cat1_" in f for f in importances["feature"].tolist())
695
+
696
+
697
+ def test_feature_importance_names__granularity_uses_deep_feature_names():
698
+ from sklearn.ensemble import RandomForestRegressor
699
+
700
+ df = pd.DataFrame(
701
+ {
702
+ "gameid": ["g1", "g1", "g2", "g2"],
703
+ "num1": [1.0, 2.0, 3.0, 4.0],
704
+ "num2": [10.0, 20.0, 30.0, 40.0],
705
+ }
706
+ )
707
+ y = pd.Series([1.0, 2.0, 3.0, 4.0], name="y")
708
+
709
+ model = AutoPipeline(
710
+ estimator=RandomForestRegressor(n_estimators=5, random_state=42),
711
+ estimator_features=["gameid", "num1", "num2"],
712
+ predictor_transformers=[AddConstantPredictionTransformer(col_name="const_pred")],
713
+ granularity=["gameid"],
714
+ categorical_features=["gameid"],
715
+ categorical_handling="ordinal",
716
+ remainder="drop",
717
+ )
718
+ model.fit(df, y)
719
+
720
+ names = model.feature_importance_names
721
+
722
+ inner = _inner_estimator(model)
723
+ assert list(names.keys()) == list(inner.feature_names_in_)
724
+ assert "gameid" not in names
725
+ assert "const_pred" in names