spforge 0.8.2__py3-none-any.whl → 0.8.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import pytest
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.linear_model import LogisticRegression
6
+
7
+ from spforge import EstimatorHyperparameterTuner, ParamSpec
8
+ from spforge.cross_validator import MatchKFoldCrossValidator
9
+ from spforge.estimator import SkLearnEnhancerEstimator
10
+ from spforge.scorer import MeanBiasScorer
11
+
12
+
13
+ class FakeLGBMClassifier(BaseEstimator):
14
+ __module__ = "lightgbm.sklearn"
15
+
16
+ def __init__(
17
+ self,
18
+ n_estimators: int = 100,
19
+ num_leaves: int = 31,
20
+ max_depth: int = 5,
21
+ min_child_samples: int = 20,
22
+ subsample: float = 1.0,
23
+ subsample_freq: int = 1,
24
+ reg_alpha: float = 0.0,
25
+ reg_lambda: float = 0.0,
26
+ ):
27
+ self.n_estimators = n_estimators
28
+ self.num_leaves = num_leaves
29
+ self.max_depth = max_depth
30
+ self.min_child_samples = min_child_samples
31
+ self.subsample = subsample
32
+ self.subsample_freq = subsample_freq
33
+ self.reg_alpha = reg_alpha
34
+ self.reg_lambda = reg_lambda
35
+
36
+ def fit(self, X, y):
37
+ self.classes_ = np.unique(y)
38
+ return self
39
+
40
+ def predict_proba(self, X):
41
+ n = len(X)
42
+ if len(self.classes_) < 2:
43
+ return np.ones((n, 1))
44
+ return np.tile([0.4, 0.6], (n, 1))
45
+
46
+ def predict(self, X):
47
+ n = len(X)
48
+ if len(self.classes_) == 1:
49
+ return np.full(n, self.classes_[0])
50
+ proba = self.predict_proba(X)
51
+ idx = np.argmax(proba, axis=1)
52
+ return np.array(self.classes_)[idx]
53
+
54
+
55
+ @pytest.fixture
56
+ def sample_df():
57
+ dates = pd.date_range("2024-01-01", periods=12, freq="D")
58
+ rows = []
59
+ for i, date in enumerate(dates):
60
+ rows.append(
61
+ {
62
+ "mid": f"M{i // 2}",
63
+ "date": date,
64
+ "x1": float(i),
65
+ "y": 1 if i % 2 == 0 else 0,
66
+ }
67
+ )
68
+ return pd.DataFrame(rows)
69
+
70
+
71
+ @pytest.fixture
72
+ def scorer():
73
+ return MeanBiasScorer(
74
+ pred_column="y_pred",
75
+ target="y",
76
+ validation_column="is_validation",
77
+ )
78
+
79
+
80
+ def test_estimator_tuner_requires_search_space(sample_df, scorer):
81
+ estimator = LogisticRegression()
82
+
83
+ cv = MatchKFoldCrossValidator(
84
+ match_id_column_name="mid",
85
+ date_column_name="date",
86
+ target_column="y",
87
+ estimator=estimator,
88
+ prediction_column_name="y_pred",
89
+ n_splits=2,
90
+ features=["x1"],
91
+ )
92
+
93
+ tuner = EstimatorHyperparameterTuner(
94
+ estimator=estimator,
95
+ cross_validator=cv,
96
+ scorer=scorer,
97
+ direction="minimize",
98
+ n_trials=2,
99
+ show_progress_bar=False,
100
+ )
101
+
102
+ with pytest.raises(ValueError, match="param_search_space is required"):
103
+ tuner.optimize(sample_df)
104
+
105
+
106
+ def test_estimator_tuner_custom_search_space(sample_df, scorer):
107
+ estimator = SkLearnEnhancerEstimator(estimator=LogisticRegression())
108
+
109
+ cv = MatchKFoldCrossValidator(
110
+ match_id_column_name="mid",
111
+ date_column_name="date",
112
+ target_column="y",
113
+ estimator=estimator,
114
+ prediction_column_name="y_pred",
115
+ n_splits=2,
116
+ features=["x1"],
117
+ )
118
+
119
+ tuner = EstimatorHyperparameterTuner(
120
+ estimator=estimator,
121
+ cross_validator=cv,
122
+ scorer=scorer,
123
+ direction="minimize",
124
+ param_search_space={
125
+ "C": ParamSpec(
126
+ param_type="float",
127
+ low=0.1,
128
+ high=2.0,
129
+ log=True,
130
+ )
131
+ },
132
+ n_trials=2,
133
+ show_progress_bar=False,
134
+ )
135
+
136
+ result = tuner.optimize(sample_df)
137
+
138
+ assert "estimator__C" in result.best_params
139
+ assert isinstance(result.best_value, float)
140
+
141
+
142
+ def test_estimator_tuner_lgbm_defaults(sample_df, scorer):
143
+ estimator = FakeLGBMClassifier()
144
+
145
+ cv = MatchKFoldCrossValidator(
146
+ match_id_column_name="mid",
147
+ date_column_name="date",
148
+ target_column="y",
149
+ estimator=estimator,
150
+ prediction_column_name="y_pred",
151
+ n_splits=2,
152
+ features=["x1"],
153
+ )
154
+
155
+ tuner = EstimatorHyperparameterTuner(
156
+ estimator=estimator,
157
+ cross_validator=cv,
158
+ scorer=scorer,
159
+ direction="minimize",
160
+ n_trials=2,
161
+ show_progress_bar=False,
162
+ )
163
+
164
+ result = tuner.optimize(sample_df)
165
+
166
+ assert "n_estimators" in result.best_params
167
+ assert isinstance(result.best_value, float)
@@ -1662,3 +1662,30 @@ def test_player_rating_team_with_strong_offense_and_weak_defense_gets_expected_r
1662
1662
 
1663
1663
  assert a_off > start_rating
1664
1664
  assert a_def < start_rating
1665
+
1666
+
1667
+ def test_fit_transform__player_rating_difference_from_team_projected_feature(base_cn, sample_df):
1668
+ """PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED computes player_off_rating - team_off_rating_projected."""
1669
+ gen = PlayerRatingGenerator(
1670
+ performance_column="perf",
1671
+ column_names=base_cn,
1672
+ auto_scale_performance=True,
1673
+ features_out=[
1674
+ RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED,
1675
+ RatingKnownFeatures.PLAYER_OFF_RATING,
1676
+ RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED,
1677
+ ],
1678
+ )
1679
+ result = gen.fit_transform(sample_df)
1680
+
1681
+ diff_col = "player_rating_difference_from_team_projected_perf"
1682
+ player_col = "player_off_rating_perf"
1683
+ team_col = "team_off_rating_projected_perf"
1684
+
1685
+ assert diff_col in result.columns
1686
+ assert player_col in result.columns
1687
+ assert team_col in result.columns
1688
+
1689
+ for row in result.iter_rows(named=True):
1690
+ expected = row[player_col] - row[team_col]
1691
+ assert row[diff_col] == pytest.approx(expected, rel=1e-9)
@@ -372,6 +372,136 @@ def test_pwmse_compare_to_naive_granularity(df_type):
372
372
  assert abs(score - expected) < 1e-10
373
373
 
374
374
 
375
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
376
+ def test_pwmse__evaluation_labels_slices_predictions(df_type):
377
+ """PWMSE with evaluation_labels should only score on specified labels."""
378
+ # Predictions have 5 labels: [-2, -1, 0, 1, 2]
379
+ # But we only want to evaluate on inner labels: [-1, 0, 1]
380
+ df = create_dataframe(
381
+ df_type,
382
+ {
383
+ "pred": [
384
+ [0.1, 0.2, 0.4, 0.2, 0.1], # Full distribution over 5 labels
385
+ [0.05, 0.15, 0.5, 0.2, 0.1],
386
+ ],
387
+ "target": [0, 1],
388
+ },
389
+ )
390
+
391
+ # Score with all labels
392
+ scorer_full = PWMSE(pred_column="pred", target="target", labels=[-2, -1, 0, 1, 2])
393
+ score_full = scorer_full.score(df)
394
+
395
+ # Score with evaluation_labels excluding boundaries
396
+ scorer_eval = PWMSE(
397
+ pred_column="pred",
398
+ target="target",
399
+ labels=[-2, -1, 0, 1, 2],
400
+ evaluation_labels=[-1, 0, 1],
401
+ )
402
+ score_eval = scorer_eval.score(df)
403
+
404
+ # Scores should be different because evaluation_labels excludes boundary penalties
405
+ assert score_full != score_eval
406
+
407
+ # Manual calculation for evaluation_labels case:
408
+ # Slice predictions to indices 1, 2, 3 (corresponding to labels -1, 0, 1)
409
+ # Then renormalize
410
+ preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1], [0.05, 0.15, 0.5, 0.2, 0.1]])
411
+ preds_sliced = preds_full[:, 1:4] # [-1, 0, 1]
412
+ preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
413
+
414
+ eval_labels = np.array([-1, 0, 1], dtype=np.float64)
415
+ targets = np.array([0, 1], dtype=np.float64)
416
+ diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
417
+ expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
418
+
419
+ assert abs(score_eval - expected) < 1e-10
420
+
421
+
422
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
423
+ def test_pwmse__evaluation_labels_with_compare_to_naive(df_type):
424
+ """PWMSE evaluation_labels should also affect naive baseline calculation."""
425
+ df = create_dataframe(
426
+ df_type,
427
+ {
428
+ "pred": [
429
+ [0.1, 0.2, 0.4, 0.2, 0.1],
430
+ [0.1, 0.2, 0.4, 0.2, 0.1],
431
+ [0.1, 0.2, 0.4, 0.2, 0.1],
432
+ [0.1, 0.2, 0.4, 0.2, 0.1],
433
+ ],
434
+ "target": [-1, 0, 0, 1], # Targets within evaluation range
435
+ },
436
+ )
437
+
438
+ scorer = PWMSE(
439
+ pred_column="pred",
440
+ target="target",
441
+ labels=[-2, -1, 0, 1, 2],
442
+ evaluation_labels=[-1, 0, 1],
443
+ compare_to_naive=True,
444
+ )
445
+ score = scorer.score(df)
446
+
447
+ # Naive should be computed using only evaluation_labels
448
+ # With targets [-1, 0, 0, 1], naive probs are [1/4, 2/4, 1/4] for labels [-1, 0, 1]
449
+ eval_labels = np.array([-1, 0, 1], dtype=np.float64)
450
+ targets = np.array([-1, 0, 0, 1], dtype=np.float64)
451
+
452
+ # Model predictions sliced and renormalized
453
+ preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]] * 4)
454
+ preds_sliced = preds_full[:, 1:4]
455
+ preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
456
+
457
+ diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
458
+ model_score = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
459
+
460
+ # Naive predictions for evaluation_labels only
461
+ naive_probs = np.array([0.25, 0.5, 0.25]) # Based on target distribution
462
+ naive_preds = np.tile(naive_probs, (4, 1))
463
+ naive_score = float((diffs_sqd * naive_preds).sum(axis=1).mean())
464
+
465
+ expected = naive_score - model_score
466
+ assert abs(score - expected) < 1e-10
467
+
468
+
469
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
470
+ def test_pwmse__evaluation_labels_filters_targets_outside_range(df_type):
471
+ """PWMSE should filter out targets outside evaluation_labels range."""
472
+ df = create_dataframe(
473
+ df_type,
474
+ {
475
+ "pred": [
476
+ [0.1, 0.2, 0.4, 0.2, 0.1],
477
+ [0.1, 0.2, 0.4, 0.2, 0.1],
478
+ [0.1, 0.2, 0.4, 0.2, 0.1],
479
+ ],
480
+ "target": [-2, 0, 2], # -2 and 2 are outside evaluation range [-1, 0, 1]
481
+ },
482
+ )
483
+
484
+ scorer = PWMSE(
485
+ pred_column="pred",
486
+ target="target",
487
+ labels=[-2, -1, 0, 1, 2],
488
+ evaluation_labels=[-1, 0, 1],
489
+ )
490
+ score = scorer.score(df)
491
+
492
+ # Should only use the row with target=0
493
+ preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]])
494
+ preds_sliced = preds_full[:, 1:4]
495
+ preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
496
+
497
+ eval_labels = np.array([-1, 0, 1], dtype=np.float64)
498
+ targets = np.array([0], dtype=np.float64)
499
+ diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
500
+ expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
501
+
502
+ assert abs(score - expected) < 1e-10
503
+
504
+
375
505
  # ============================================================================
376
506
  # D. MeanBiasScorer Tests
377
507
  # ============================================================================
@@ -1762,6 +1892,129 @@ def test_pwmse__accepts_ndarray_predictions(df_type):
1762
1892
  assert score >= 0
1763
1893
 
1764
1894
 
1895
+ # ============================================================================
1896
+ # ThresholdEventScorer with granularity and compare_to_naive Tests
1897
+ # ============================================================================
1898
+
1899
+
1900
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
1901
+ def test_threshold_event_scorer__granularity_with_compare_to_naive(df_type):
1902
+ """ThresholdEventScorer fails when combining compare_to_naive with granularity.
1903
+
1904
+ Bug: When granularity is set, binary_scorer.score() returns a dict, but
1905
+ the naive comparison tries to do dict - dict which fails with:
1906
+ 'unsupported operand type(s) for -: 'dict' and 'dict''
1907
+ """
1908
+ df = create_dataframe(
1909
+ df_type,
1910
+ {
1911
+ "qtr": [1, 1, 1, 2, 2, 2],
1912
+ "dist": [
1913
+ [0.1, 0.2, 0.3, 0.4],
1914
+ [0.2, 0.3, 0.3, 0.2],
1915
+ [0.3, 0.4, 0.2, 0.1],
1916
+ [0.4, 0.3, 0.2, 0.1],
1917
+ [0.1, 0.1, 0.4, 0.4],
1918
+ [0.2, 0.2, 0.3, 0.3],
1919
+ ],
1920
+ "ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
1921
+ "rush_yards": [3, 2, 0, 1, 2, 4],
1922
+ },
1923
+ )
1924
+
1925
+ scorer = ThresholdEventScorer(
1926
+ dist_column="dist",
1927
+ threshold_column="ydstogo",
1928
+ outcome_column="rush_yards",
1929
+ labels=[0, 1, 2, 3],
1930
+ compare_to_naive=True,
1931
+ granularity=["qtr"],
1932
+ )
1933
+
1934
+ result = scorer.score(df)
1935
+
1936
+ assert isinstance(result, dict)
1937
+ assert len(result) == 2
1938
+ assert (1,) in result
1939
+ assert (2,) in result
1940
+ assert all(isinstance(v, float) for v in result.values())
1941
+
1942
+
1943
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
1944
+ def test_threshold_event_scorer__granularity_with_compare_to_naive_and_naive_granularity(df_type):
1945
+ """ThresholdEventScorer with both granularity and naive_granularity."""
1946
+ df = create_dataframe(
1947
+ df_type,
1948
+ {
1949
+ "qtr": [1, 1, 1, 2, 2, 2],
1950
+ "team": ["A", "A", "B", "A", "B", "B"],
1951
+ "dist": [
1952
+ [0.1, 0.2, 0.3, 0.4],
1953
+ [0.2, 0.3, 0.3, 0.2],
1954
+ [0.3, 0.4, 0.2, 0.1],
1955
+ [0.4, 0.3, 0.2, 0.1],
1956
+ [0.1, 0.1, 0.4, 0.4],
1957
+ [0.2, 0.2, 0.3, 0.3],
1958
+ ],
1959
+ "ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
1960
+ "rush_yards": [3, 2, 0, 1, 2, 4],
1961
+ },
1962
+ )
1963
+
1964
+ scorer = ThresholdEventScorer(
1965
+ dist_column="dist",
1966
+ threshold_column="ydstogo",
1967
+ outcome_column="rush_yards",
1968
+ labels=[0, 1, 2, 3],
1969
+ compare_to_naive=True,
1970
+ naive_granularity=["team"],
1971
+ granularity=["qtr"],
1972
+ )
1973
+
1974
+ result = scorer.score(df)
1975
+
1976
+ assert isinstance(result, dict)
1977
+ assert len(result) == 2
1978
+ assert (1,) in result
1979
+ assert (2,) in result
1980
+ assert all(isinstance(v, float) for v in result.values())
1981
+
1982
+
1983
+ @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
1984
+ def test_threshold_event_scorer__multi_column_granularity_with_compare_to_naive(df_type):
1985
+ """ThresholdEventScorer with multi-column granularity and compare_to_naive."""
1986
+ df = create_dataframe(
1987
+ df_type,
1988
+ {
1989
+ "qtr": [1, 1, 2, 2],
1990
+ "half": [1, 1, 2, 2],
1991
+ "dist": [
1992
+ [0.1, 0.2, 0.3, 0.4],
1993
+ [0.2, 0.3, 0.3, 0.2],
1994
+ [0.4, 0.3, 0.2, 0.1],
1995
+ [0.1, 0.1, 0.4, 0.4],
1996
+ ],
1997
+ "ydstogo": [2.0, 3.0, 2.0, 1.0],
1998
+ "rush_yards": [3, 2, 1, 2],
1999
+ },
2000
+ )
2001
+
2002
+ scorer = ThresholdEventScorer(
2003
+ dist_column="dist",
2004
+ threshold_column="ydstogo",
2005
+ outcome_column="rush_yards",
2006
+ labels=[0, 1, 2, 3],
2007
+ compare_to_naive=True,
2008
+ granularity=["qtr", "half"],
2009
+ )
2010
+
2011
+ result = scorer.score(df)
2012
+
2013
+ assert isinstance(result, dict)
2014
+ assert len(result) == 2
2015
+ assert all(isinstance(v, float) for v in result.values())
2016
+
2017
+
1765
2018
  @pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
1766
2019
  def test_all_scorers_handle_all_nan_targets(df_type):
1767
2020
  """All scorers handle case where all targets are NaN"""
@@ -1795,3 +2048,93 @@ def test_all_scorers_handle_all_nan_targets(df_type):
1795
2048
  assert np.isnan(score) or score == 0.0
1796
2049
  except (ValueError, IndexError):
1797
2050
  pass
2051
+ SCORER_VALIDATION_CASES = [
2052
+ pytest.param(
2053
+ lambda: MeanBiasScorer(pred_column="pred", target="target", validation_column="is_validation"),
2054
+ lambda: pd.DataFrame(
2055
+ {
2056
+ "pred": [2.0, 0.0],
2057
+ "target": [1.0, 2.0],
2058
+ "is_validation": [1, 0],
2059
+ }
2060
+ ),
2061
+ id="mean_bias",
2062
+ ),
2063
+ pytest.param(
2064
+ lambda: PWMSE(pred_column="pred", target="target", labels=[0, 1], validation_column="is_validation"),
2065
+ lambda: pd.DataFrame(
2066
+ {
2067
+ "pred": [[0.7, 0.3], [0.4, 0.6]],
2068
+ "target": [0, 1],
2069
+ "is_validation": [1, 0],
2070
+ }
2071
+ ),
2072
+ id="pwmse",
2073
+ ),
2074
+ pytest.param(
2075
+ lambda: SklearnScorer(
2076
+ scorer_function=mean_absolute_error, pred_column="pred", target="target", validation_column="is_validation"
2077
+ ),
2078
+ lambda: pd.DataFrame(
2079
+ {
2080
+ "pred": [1.0, 0.0],
2081
+ "target": [1.0, 0.0],
2082
+ "is_validation": [1, 0],
2083
+ }
2084
+ ),
2085
+ id="sklearn",
2086
+ ),
2087
+ pytest.param(
2088
+ lambda: ProbabilisticMeanBias(
2089
+ pred_column="pred", target="target", class_column_name="classes", validation_column="is_validation"
2090
+ ),
2091
+ lambda: pd.DataFrame(
2092
+ {
2093
+ "pred": [[0.2, 0.8], [0.6, 0.4]],
2094
+ "target": [1, 0],
2095
+ "classes": [[0, 1], [0, 1]],
2096
+ "is_validation": [1, 0],
2097
+ }
2098
+ ),
2099
+ id="probabilistic_mean_bias",
2100
+ ),
2101
+ pytest.param(
2102
+ lambda: OrdinalLossScorer(pred_column="pred", target="target", classes=[0, 1], validation_column="is_validation"),
2103
+ lambda: pd.DataFrame(
2104
+ {
2105
+ "pred": [[0.2, 0.8], [0.6, 0.4]],
2106
+ "target": [1, 0],
2107
+ "is_validation": [1, 0],
2108
+ }
2109
+ ),
2110
+ id="ordinal_loss",
2111
+ ),
2112
+ pytest.param(
2113
+ lambda: ThresholdEventScorer(
2114
+ dist_column="dist",
2115
+ threshold_column="threshold",
2116
+ outcome_column="outcome",
2117
+ comparator=Operator.GREATER_THAN_OR_EQUALS,
2118
+ validation_column="is_validation",
2119
+ ),
2120
+ lambda: pd.DataFrame(
2121
+ {
2122
+ "dist": [[0.2, 0.8], [0.6, 0.4], [0.3, 0.7]],
2123
+ "threshold": [0.5, 0.2, 0.3],
2124
+ "outcome": [1, 0, 1],
2125
+ "is_validation": [1, 1, 0],
2126
+ }
2127
+ ),
2128
+ id="threshold_event",
2129
+ ),
2130
+ ]
2131
+
2132
+
2133
+ @pytest.mark.parametrize("scorer_factory, df_factory", SCORER_VALIDATION_CASES)
2134
+ def test_scorers_respect_validation_column(scorer_factory, df_factory):
2135
+ """Scorers should filter on validation_column when specified."""
2136
+ df = df_factory()
2137
+ df_valid = df[df["is_validation"] == 1]
2138
+ score_all = scorer_factory().score(df)
2139
+ score_valid = scorer_factory().score(df_valid)
2140
+ assert score_all == score_valid
@@ -16,6 +16,49 @@ def column_names():
16
16
  )
17
17
 
18
18
 
19
+ class PolarsOnlyGenerator:
20
+ def __init__(self):
21
+ self._features_out = ["polars_only_feature"]
22
+
23
+ @property
24
+ def features_out(self):
25
+ return self._features_out
26
+
27
+ def fit_transform(self, df, column_names=None):
28
+ if not isinstance(df, pl.DataFrame):
29
+ raise TypeError("Expected polars DataFrame")
30
+ return df.with_columns((pl.col("points") * 2).alias("polars_only_feature"))
31
+
32
+ def transform(self, df):
33
+ if not isinstance(df, pl.DataFrame):
34
+ raise TypeError("Expected polars DataFrame")
35
+ return df.with_columns((pl.col("points") * 2).alias("polars_only_feature"))
36
+
37
+ def future_transform(self, df):
38
+ return self.transform(df)
39
+
40
+
41
+ def test_feature_generator_pipeline__passes_native_polars_to_custom_generator(column_names):
42
+ data = pl.DataFrame(
43
+ {
44
+ "game_id": [1, 1],
45
+ "team_id": ["A", "B"],
46
+ "player_id": ["p1", "p2"],
47
+ "date": pd.to_datetime(["2023-01-01", "2023-01-01"]),
48
+ "points": [10, 15],
49
+ }
50
+ )
51
+
52
+ pipeline = FeatureGeneratorPipeline(
53
+ feature_generators=[PolarsOnlyGenerator()],
54
+ column_names=column_names,
55
+ )
56
+
57
+ result = pipeline.fit_transform(data, column_names=column_names)
58
+
59
+ assert "polars_only_feature" in result.columns
60
+
61
+
19
62
  @pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
20
63
  def test_feature_generator_pipeline__fit_transform_preserves_row_count(df_type, column_names):
21
64
  """FeatureGeneratorPipeline.fit_transform should preserve row count."""