spforge 0.8.13__tar.gz → 0.8.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

Files changed (119) hide show
  1. {spforge-0.8.13/spforge.egg-info → spforge-0.8.15}/PKG-INFO +1 -1
  2. {spforge-0.8.13 → spforge-0.8.15}/pyproject.toml +1 -1
  3. {spforge-0.8.13 → spforge-0.8.15}/spforge/autopipeline.py +66 -4
  4. {spforge-0.8.13 → spforge-0.8.15/spforge.egg-info}/PKG-INFO +1 -1
  5. {spforge-0.8.13 → spforge-0.8.15}/tests/test_autopipeline.py +53 -0
  6. {spforge-0.8.13 → spforge-0.8.15}/LICENSE +0 -0
  7. {spforge-0.8.13 → spforge-0.8.15}/MANIFEST.in +0 -0
  8. {spforge-0.8.13 → spforge-0.8.15}/README.md +0 -0
  9. {spforge-0.8.13 → spforge-0.8.15}/examples/__init__.py +0 -0
  10. {spforge-0.8.13 → spforge-0.8.15}/examples/game_level_example.py +0 -0
  11. {spforge-0.8.13 → spforge-0.8.15}/examples/lol/__init__.py +0 -0
  12. {spforge-0.8.13 → spforge-0.8.15}/examples/lol/data/__init__.py +0 -0
  13. {spforge-0.8.13 → spforge-0.8.15}/examples/lol/data/subsample_lol_data.parquet +0 -0
  14. {spforge-0.8.13 → spforge-0.8.15}/examples/lol/data/utils.py +0 -0
  15. {spforge-0.8.13 → spforge-0.8.15}/examples/lol/pipeline_transformer_example.py +0 -0
  16. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/__init__.py +0 -0
  17. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/cross_validation_example.py +0 -0
  18. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/data/__init__.py +0 -0
  19. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/data/game_player_subsample.parquet +0 -0
  20. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/data/utils.py +0 -0
  21. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/feature_engineering_example.py +0 -0
  22. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/game_winner_example.py +0 -0
  23. {spforge-0.8.13 → spforge-0.8.15}/examples/nba/predictor_transformers_example.py +0 -0
  24. {spforge-0.8.13 → spforge-0.8.15}/setup.cfg +0 -0
  25. {spforge-0.8.13 → spforge-0.8.15}/spforge/__init__.py +0 -0
  26. {spforge-0.8.13 → spforge-0.8.15}/spforge/base_feature_generator.py +0 -0
  27. {spforge-0.8.13 → spforge-0.8.15}/spforge/cross_validator/__init__.py +0 -0
  28. {spforge-0.8.13 → spforge-0.8.15}/spforge/cross_validator/_base.py +0 -0
  29. {spforge-0.8.13 → spforge-0.8.15}/spforge/cross_validator/cross_validator.py +0 -0
  30. {spforge-0.8.13 → spforge-0.8.15}/spforge/data_structures.py +0 -0
  31. {spforge-0.8.13 → spforge-0.8.15}/spforge/distributions/__init__.py +0 -0
  32. {spforge-0.8.13 → spforge-0.8.15}/spforge/distributions/_negative_binomial_estimator.py +0 -0
  33. {spforge-0.8.13 → spforge-0.8.15}/spforge/distributions/_normal_distribution_predictor.py +0 -0
  34. {spforge-0.8.13 → spforge-0.8.15}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
  35. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/__init__.py +0 -0
  36. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_conditional_estimator.py +0 -0
  37. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
  38. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_granularity_estimator.py +0 -0
  39. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_group_by_estimator.py +0 -0
  40. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_ordinal_classifier.py +0 -0
  41. {spforge-0.8.13 → spforge-0.8.15}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
  42. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/__init__.py +0 -0
  43. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_base.py +0 -0
  44. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_lag.py +0 -0
  45. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_net_over_predicted.py +0 -0
  46. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
  47. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
  48. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
  49. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_rolling_mean_days.py +0 -0
  50. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_rolling_window.py +0 -0
  51. {spforge-0.8.13 → spforge-0.8.15}/spforge/feature_generator/_utils.py +0 -0
  52. {spforge-0.8.13 → spforge-0.8.15}/spforge/features_generator_pipeline.py +0 -0
  53. {spforge-0.8.13 → spforge-0.8.15}/spforge/hyperparameter_tuning/__init__.py +0 -0
  54. {spforge-0.8.13 → spforge-0.8.15}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
  55. {spforge-0.8.13 → spforge-0.8.15}/spforge/hyperparameter_tuning/_tuner.py +0 -0
  56. {spforge-0.8.13 → spforge-0.8.15}/spforge/performance_transformers/__init__.py +0 -0
  57. {spforge-0.8.13 → spforge-0.8.15}/spforge/performance_transformers/_performance_manager.py +0 -0
  58. {spforge-0.8.13 → spforge-0.8.15}/spforge/performance_transformers/_performances_transformers.py +0 -0
  59. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/__init__.py +0 -0
  60. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/_base.py +0 -0
  61. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/_player_rating.py +0 -0
  62. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/_team_rating.py +0 -0
  63. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/enums.py +0 -0
  64. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/league_identifier.py +0 -0
  65. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/league_start_rating_optimizer.py +0 -0
  66. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/player_performance_predictor.py +0 -0
  67. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/start_rating_generator.py +0 -0
  68. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/team_performance_predictor.py +0 -0
  69. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/team_start_rating_generator.py +0 -0
  70. {spforge-0.8.13 → spforge-0.8.15}/spforge/ratings/utils.py +0 -0
  71. {spforge-0.8.13 → spforge-0.8.15}/spforge/scorer/__init__.py +0 -0
  72. {spforge-0.8.13 → spforge-0.8.15}/spforge/scorer/_score.py +0 -0
  73. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/__init__.py +0 -0
  74. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_base.py +0 -0
  75. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_net_over_predicted.py +0 -0
  76. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_operator.py +0 -0
  77. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_other_transformer.py +0 -0
  78. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_predictor.py +0 -0
  79. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_simple_transformer.py +0 -0
  80. {spforge-0.8.13 → spforge-0.8.15}/spforge/transformers/_team_ratio_predictor.py +0 -0
  81. {spforge-0.8.13 → spforge-0.8.15}/spforge/utils.py +0 -0
  82. {spforge-0.8.13 → spforge-0.8.15}/spforge.egg-info/SOURCES.txt +0 -0
  83. {spforge-0.8.13 → spforge-0.8.15}/spforge.egg-info/dependency_links.txt +0 -0
  84. {spforge-0.8.13 → spforge-0.8.15}/spforge.egg-info/requires.txt +0 -0
  85. {spforge-0.8.13 → spforge-0.8.15}/spforge.egg-info/top_level.txt +0 -0
  86. {spforge-0.8.13 → spforge-0.8.15}/tests/cross_validator/test_cross_validator.py +0 -0
  87. {spforge-0.8.13 → spforge-0.8.15}/tests/distributions/test_distribution.py +0 -0
  88. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
  89. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
  90. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_lol_player_kills.py +0 -0
  91. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_nba_player_points.py +0 -0
  92. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
  93. {spforge-0.8.13 → spforge-0.8.15}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
  94. {spforge-0.8.13 → spforge-0.8.15}/tests/estimator/test_sklearn_estimator.py +0 -0
  95. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_lag.py +0 -0
  96. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
  97. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
  98. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
  99. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_rolling_mean_days.py +0 -0
  100. {spforge-0.8.13 → spforge-0.8.15}/tests/feature_generator/test_rolling_window.py +0 -0
  101. {spforge-0.8.13 → spforge-0.8.15}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
  102. {spforge-0.8.13 → spforge-0.8.15}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
  103. {spforge-0.8.13 → spforge-0.8.15}/tests/performance_transformers/test_performance_manager.py +0 -0
  104. {spforge-0.8.13 → spforge-0.8.15}/tests/performance_transformers/test_performances_transformers.py +0 -0
  105. {spforge-0.8.13 → spforge-0.8.15}/tests/ratings/test_player_rating_generator.py +0 -0
  106. {spforge-0.8.13 → spforge-0.8.15}/tests/ratings/test_player_rating_no_mutation.py +0 -0
  107. {spforge-0.8.13 → spforge-0.8.15}/tests/ratings/test_ratings_property.py +0 -0
  108. {spforge-0.8.13 → spforge-0.8.15}/tests/ratings/test_team_rating_generator.py +0 -0
  109. {spforge-0.8.13 → spforge-0.8.15}/tests/ratings/test_utils_scaled_weights.py +0 -0
  110. {spforge-0.8.13 → spforge-0.8.15}/tests/scorer/test_score.py +0 -0
  111. {spforge-0.8.13 → spforge-0.8.15}/tests/scorer/test_score_aggregation_granularity.py +0 -0
  112. {spforge-0.8.13 → spforge-0.8.15}/tests/test_autopipeline_context.py +0 -0
  113. {spforge-0.8.13 → spforge-0.8.15}/tests/test_feature_generator_pipeline.py +0 -0
  114. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_estimator_transformer_context.py +0 -0
  115. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_net_over_predicted.py +0 -0
  116. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_other_transformer.py +0 -0
  117. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_predictor_transformer.py +0 -0
  118. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_simple_transformer.py +0 -0
  119. {spforge-0.8.13 → spforge-0.8.15}/tests/transformers/test_team_ratio_predictor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.13
3
+ Version: 0.8.15
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spforge"
7
- version = "0.8.13"
7
+ version = "0.8.15"
8
8
  description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -264,6 +264,7 @@ class AutoPipeline(BaseEstimator):
264
264
  self.numeric_features = numeric_features
265
265
  self.remainder = remainder
266
266
  self._cat_feats = []
267
+ self._filter_feature_names: list[str] = []
267
268
 
268
269
  # Auto-compute context features
269
270
  self.context_feature_names = self._compute_context_features()
@@ -276,11 +277,12 @@ class AutoPipeline(BaseEstimator):
276
277
  self._resolved_categorical_handling: CategoricalHandling | None = None
277
278
 
278
279
  def _compute_context_features(self) -> list[str]:
279
- """Auto-compute context features from estimator, granularity, and filters.
280
+ """Auto-compute context features from estimator and granularity.
280
281
 
281
282
  Note: Context from predictor_transformers is tracked separately in
282
283
  context_predictor_transformer_feature_names and is dropped before
283
- the final estimator.
284
+ the final estimator. Filter columns are tracked separately and are
285
+ dropped before the final estimator.
284
286
  """
285
287
  from spforge.transformers._base import PredictorTransformer
286
288
 
@@ -325,8 +327,10 @@ class AutoPipeline(BaseEstimator):
325
327
  context.extend(self.granularity)
326
328
 
327
329
  # Add filter columns
330
+ self._filter_feature_names = []
328
331
  for f in self.filters:
329
- context.append(f.column_name)
332
+ if f.column_name not in self._filter_feature_names:
333
+ self._filter_feature_names.append(f.column_name)
330
334
 
331
335
  # Dedupe while preserving order, excluding estimator_features
332
336
  seen = set()
@@ -540,8 +544,10 @@ class AutoPipeline(BaseEstimator):
540
544
  prev_transformer_feats_out.extend(feats_out)
541
545
 
542
546
  # Use FunctionTransformer with global function for serializability
547
+ drop_filter_cols = set(self._filter_feature_names)
548
+ drop_cols = drop_ctx_set | drop_filter_cols
543
549
  final = FunctionTransformer(
544
- _drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_ctx_set}
550
+ _drop_columns_transformer, validate=False, kw_args={"drop_cols": drop_cols}
545
551
  )
546
552
  steps.append(("final", final))
547
553
 
@@ -572,6 +578,7 @@ class AutoPipeline(BaseEstimator):
572
578
  self.feature_names
573
579
  + self.context_feature_names
574
580
  + self.context_predictor_transformer_feature_names
581
+ + self._filter_feature_names
575
582
  + self.granularity
576
583
  )
577
584
  )
@@ -660,6 +667,11 @@ class AutoPipeline(BaseEstimator):
660
667
  if ctx not in all_features:
661
668
  all_features.append(ctx)
662
669
 
670
+ # Add filter columns (needed for fit-time filtering)
671
+ for col in self._filter_feature_names:
672
+ if col not in all_features:
673
+ all_features.append(col)
674
+
663
675
  return all_features
664
676
 
665
677
  def _get_estimator_feature_names(self) -> list[str]:
@@ -679,8 +691,28 @@ class AutoPipeline(BaseEstimator):
679
691
  context_set = set(self.context_feature_names)
680
692
  features = [f for f in features if f not in context_set]
681
693
 
694
+ # Remove filter columns (used only for fit-time filtering)
695
+ filter_set = set(self._filter_feature_names)
696
+ features = [f for f in features if f not in filter_set]
697
+
682
698
  return features
683
699
 
700
+ def _resolve_importance_feature_names(self, estimator, n_features: int) -> list[str]:
701
+ names = None
702
+ if hasattr(estimator, "feature_names_in_") and estimator.feature_names_in_ is not None:
703
+ names = list(estimator.feature_names_in_)
704
+ elif hasattr(estimator, "feature_name_") and estimator.feature_name_ is not None:
705
+ names = list(estimator.feature_name_)
706
+ elif hasattr(estimator, "feature_names_") and estimator.feature_names_ is not None:
707
+ names = list(estimator.feature_names_)
708
+ if names is None:
709
+ names = self._get_estimator_feature_names()
710
+ if len(names) != n_features:
711
+ raise ValueError(
712
+ f"Feature names length ({len(names)}) does not match importances length ({n_features})."
713
+ )
714
+ return names
715
+
684
716
  @property
685
717
  def feature_importances_(self) -> pd.DataFrame:
686
718
  """Get feature importances from the fitted estimator.
@@ -719,3 +751,33 @@ class AutoPipeline(BaseEstimator):
719
751
  df = pd.DataFrame({"feature": feature_names, "importance": importances})
720
752
  df = df.sort_values("importance", ascending=False, key=abs).reset_index(drop=True)
721
753
  return df
754
+
755
+ @property
756
+ def feature_importance_names(self) -> dict[str, float]:
757
+ """Map deepest estimator feature names to importances."""
758
+ if self.sklearn_pipeline is None:
759
+ raise RuntimeError("Pipeline not fitted. Call fit() first.")
760
+
761
+ est = self.sklearn_pipeline.named_steps["est"]
762
+ result = _get_importance_estimator(est)
763
+
764
+ if result is None:
765
+ raise RuntimeError(
766
+ "Estimator does not support feature importances. "
767
+ "Requires feature_importances_ or coef_ attribute."
768
+ )
769
+
770
+ inner_est, attr_name = result
771
+ raw = getattr(inner_est, attr_name)
772
+
773
+ if attr_name == "coef_":
774
+ if raw.ndim == 2:
775
+ importances = np.abs(raw).mean(axis=0)
776
+ else:
777
+ importances = np.abs(raw)
778
+ else:
779
+ importances = raw
780
+
781
+ importances = np.asarray(importances)
782
+ feature_names = self._resolve_importance_feature_names(inner_est, len(importances))
783
+ return dict(zip(feature_names, importances.tolist()))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.13
3
+ Version: 0.8.15
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -12,6 +12,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression
12
12
 
13
13
  from spforge import AutoPipeline
14
14
  from spforge.estimator import SkLearnEnhancerEstimator
15
+ from spforge.scorer import Filter, Operator
15
16
  from spforge.transformers import EstimatorTransformer
16
17
 
17
18
 
@@ -231,6 +232,27 @@ def test_predict_proba(df_clf):
231
232
  assert np.allclose(proba.sum(axis=1), 1.0, atol=1e-6)
232
233
 
233
234
 
235
+ def test_filter_columns_not_passed_to_estimator(frame):
236
+ df_pd = pd.DataFrame(
237
+ {"x": [1.0, 2.0, 3.0, 4.0], "keep": [1, 0, 1, 0], "y": [1.0, 2.0, 3.0, 4.0]}
238
+ )
239
+ df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
240
+
241
+ model = AutoPipeline(
242
+ estimator=CaptureEstimator(),
243
+ estimator_features=["x"],
244
+ filters=[Filter(column_name="keep", value=1, operator=Operator.EQUALS)],
245
+ )
246
+
247
+ X = _select(df, ["x", "keep"])
248
+ y = _col(df, "y")
249
+ model.fit(X, y=y)
250
+
251
+ est = _inner_estimator(model)
252
+ assert "keep" in model.required_features
253
+ assert "keep" not in est.fit_columns
254
+
255
+
234
256
  def test_predict_proba_raises_if_not_supported(df_reg):
235
257
  model = AutoPipeline(
236
258
  estimator=LinearRegression(),
@@ -692,3 +714,34 @@ def test_feature_importances__onehot_features():
692
714
  assert len(importances) == 4
693
715
  assert "num1" in importances["feature"].tolist()
694
716
  assert any("cat1_" in f for f in importances["feature"].tolist())
717
+
718
+
719
+ def test_feature_importance_names__granularity_uses_deep_feature_names():
720
+ from sklearn.ensemble import RandomForestRegressor
721
+
722
+ df = pd.DataFrame(
723
+ {
724
+ "gameid": ["g1", "g1", "g2", "g2"],
725
+ "num1": [1.0, 2.0, 3.0, 4.0],
726
+ "num2": [10.0, 20.0, 30.0, 40.0],
727
+ }
728
+ )
729
+ y = pd.Series([1.0, 2.0, 3.0, 4.0], name="y")
730
+
731
+ model = AutoPipeline(
732
+ estimator=RandomForestRegressor(n_estimators=5, random_state=42),
733
+ estimator_features=["gameid", "num1", "num2"],
734
+ predictor_transformers=[AddConstantPredictionTransformer(col_name="const_pred")],
735
+ granularity=["gameid"],
736
+ categorical_features=["gameid"],
737
+ categorical_handling="ordinal",
738
+ remainder="drop",
739
+ )
740
+ model.fit(df, y)
741
+
742
+ names = model.feature_importance_names
743
+
744
+ inner = _inner_estimator(model)
745
+ assert list(names.keys()) == list(inner.feature_names_in_)
746
+ assert "gameid" not in names
747
+ assert "const_pred" in names
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes