spforge 0.8.2__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/lol/pipeline_transformer_example.py +69 -86
- examples/nba/cross_validation_example.py +4 -11
- examples/nba/feature_engineering_example.py +33 -15
- examples/nba/game_winner_example.py +24 -14
- examples/nba/predictor_transformers_example.py +29 -16
- spforge/__init__.py +1 -0
- spforge/features_generator_pipeline.py +8 -4
- spforge/hyperparameter_tuning/__init__.py +12 -0
- spforge/hyperparameter_tuning/_default_search_spaces.py +159 -1
- spforge/hyperparameter_tuning/_tuner.py +192 -0
- spforge/ratings/__init__.py +4 -0
- spforge/ratings/_player_rating.py +11 -0
- spforge/ratings/league_start_rating_optimizer.py +201 -0
- spforge/scorer/_score.py +38 -3
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/METADATA +12 -19
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/RECORD +26 -22
- tests/end_to_end/test_estimator_hyperparameter_tuning.py +85 -0
- tests/end_to_end/test_league_start_rating_optimizer.py +117 -0
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +5 -0
- tests/hyperparameter_tuning/test_estimator_tuner.py +167 -0
- tests/ratings/test_player_rating_generator.py +27 -0
- tests/scorer/test_score.py +343 -0
- tests/test_feature_generator_pipeline.py +43 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/WHEEL +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
from sklearn.base import BaseEstimator
|
|
5
|
+
from sklearn.linear_model import LogisticRegression
|
|
6
|
+
|
|
7
|
+
from spforge import EstimatorHyperparameterTuner, ParamSpec
|
|
8
|
+
from spforge.cross_validator import MatchKFoldCrossValidator
|
|
9
|
+
from spforge.estimator import SkLearnEnhancerEstimator
|
|
10
|
+
from spforge.scorer import MeanBiasScorer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FakeLGBMClassifier(BaseEstimator):
|
|
14
|
+
__module__ = "lightgbm.sklearn"
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
n_estimators: int = 100,
|
|
19
|
+
num_leaves: int = 31,
|
|
20
|
+
max_depth: int = 5,
|
|
21
|
+
min_child_samples: int = 20,
|
|
22
|
+
subsample: float = 1.0,
|
|
23
|
+
subsample_freq: int = 1,
|
|
24
|
+
reg_alpha: float = 0.0,
|
|
25
|
+
reg_lambda: float = 0.0,
|
|
26
|
+
):
|
|
27
|
+
self.n_estimators = n_estimators
|
|
28
|
+
self.num_leaves = num_leaves
|
|
29
|
+
self.max_depth = max_depth
|
|
30
|
+
self.min_child_samples = min_child_samples
|
|
31
|
+
self.subsample = subsample
|
|
32
|
+
self.subsample_freq = subsample_freq
|
|
33
|
+
self.reg_alpha = reg_alpha
|
|
34
|
+
self.reg_lambda = reg_lambda
|
|
35
|
+
|
|
36
|
+
def fit(self, X, y):
|
|
37
|
+
self.classes_ = np.unique(y)
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
def predict_proba(self, X):
|
|
41
|
+
n = len(X)
|
|
42
|
+
if len(self.classes_) < 2:
|
|
43
|
+
return np.ones((n, 1))
|
|
44
|
+
return np.tile([0.4, 0.6], (n, 1))
|
|
45
|
+
|
|
46
|
+
def predict(self, X):
|
|
47
|
+
n = len(X)
|
|
48
|
+
if len(self.classes_) == 1:
|
|
49
|
+
return np.full(n, self.classes_[0])
|
|
50
|
+
proba = self.predict_proba(X)
|
|
51
|
+
idx = np.argmax(proba, axis=1)
|
|
52
|
+
return np.array(self.classes_)[idx]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@pytest.fixture
|
|
56
|
+
def sample_df():
|
|
57
|
+
dates = pd.date_range("2024-01-01", periods=12, freq="D")
|
|
58
|
+
rows = []
|
|
59
|
+
for i, date in enumerate(dates):
|
|
60
|
+
rows.append(
|
|
61
|
+
{
|
|
62
|
+
"mid": f"M{i // 2}",
|
|
63
|
+
"date": date,
|
|
64
|
+
"x1": float(i),
|
|
65
|
+
"y": 1 if i % 2 == 0 else 0,
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
return pd.DataFrame(rows)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@pytest.fixture
|
|
72
|
+
def scorer():
|
|
73
|
+
return MeanBiasScorer(
|
|
74
|
+
pred_column="y_pred",
|
|
75
|
+
target="y",
|
|
76
|
+
validation_column="is_validation",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_estimator_tuner_requires_search_space(sample_df, scorer):
|
|
81
|
+
estimator = LogisticRegression()
|
|
82
|
+
|
|
83
|
+
cv = MatchKFoldCrossValidator(
|
|
84
|
+
match_id_column_name="mid",
|
|
85
|
+
date_column_name="date",
|
|
86
|
+
target_column="y",
|
|
87
|
+
estimator=estimator,
|
|
88
|
+
prediction_column_name="y_pred",
|
|
89
|
+
n_splits=2,
|
|
90
|
+
features=["x1"],
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
tuner = EstimatorHyperparameterTuner(
|
|
94
|
+
estimator=estimator,
|
|
95
|
+
cross_validator=cv,
|
|
96
|
+
scorer=scorer,
|
|
97
|
+
direction="minimize",
|
|
98
|
+
n_trials=2,
|
|
99
|
+
show_progress_bar=False,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
with pytest.raises(ValueError, match="param_search_space is required"):
|
|
103
|
+
tuner.optimize(sample_df)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_estimator_tuner_custom_search_space(sample_df, scorer):
|
|
107
|
+
estimator = SkLearnEnhancerEstimator(estimator=LogisticRegression())
|
|
108
|
+
|
|
109
|
+
cv = MatchKFoldCrossValidator(
|
|
110
|
+
match_id_column_name="mid",
|
|
111
|
+
date_column_name="date",
|
|
112
|
+
target_column="y",
|
|
113
|
+
estimator=estimator,
|
|
114
|
+
prediction_column_name="y_pred",
|
|
115
|
+
n_splits=2,
|
|
116
|
+
features=["x1"],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
tuner = EstimatorHyperparameterTuner(
|
|
120
|
+
estimator=estimator,
|
|
121
|
+
cross_validator=cv,
|
|
122
|
+
scorer=scorer,
|
|
123
|
+
direction="minimize",
|
|
124
|
+
param_search_space={
|
|
125
|
+
"C": ParamSpec(
|
|
126
|
+
param_type="float",
|
|
127
|
+
low=0.1,
|
|
128
|
+
high=2.0,
|
|
129
|
+
log=True,
|
|
130
|
+
)
|
|
131
|
+
},
|
|
132
|
+
n_trials=2,
|
|
133
|
+
show_progress_bar=False,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
result = tuner.optimize(sample_df)
|
|
137
|
+
|
|
138
|
+
assert "estimator__C" in result.best_params
|
|
139
|
+
assert isinstance(result.best_value, float)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def test_estimator_tuner_lgbm_defaults(sample_df, scorer):
|
|
143
|
+
estimator = FakeLGBMClassifier()
|
|
144
|
+
|
|
145
|
+
cv = MatchKFoldCrossValidator(
|
|
146
|
+
match_id_column_name="mid",
|
|
147
|
+
date_column_name="date",
|
|
148
|
+
target_column="y",
|
|
149
|
+
estimator=estimator,
|
|
150
|
+
prediction_column_name="y_pred",
|
|
151
|
+
n_splits=2,
|
|
152
|
+
features=["x1"],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
tuner = EstimatorHyperparameterTuner(
|
|
156
|
+
estimator=estimator,
|
|
157
|
+
cross_validator=cv,
|
|
158
|
+
scorer=scorer,
|
|
159
|
+
direction="minimize",
|
|
160
|
+
n_trials=2,
|
|
161
|
+
show_progress_bar=False,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
result = tuner.optimize(sample_df)
|
|
165
|
+
|
|
166
|
+
assert "n_estimators" in result.best_params
|
|
167
|
+
assert isinstance(result.best_value, float)
|
|
@@ -1662,3 +1662,30 @@ def test_player_rating_team_with_strong_offense_and_weak_defense_gets_expected_r
|
|
|
1662
1662
|
|
|
1663
1663
|
assert a_off > start_rating
|
|
1664
1664
|
assert a_def < start_rating
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
def test_fit_transform__player_rating_difference_from_team_projected_feature(base_cn, sample_df):
|
|
1668
|
+
"""PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED computes player_off_rating - team_off_rating_projected."""
|
|
1669
|
+
gen = PlayerRatingGenerator(
|
|
1670
|
+
performance_column="perf",
|
|
1671
|
+
column_names=base_cn,
|
|
1672
|
+
auto_scale_performance=True,
|
|
1673
|
+
features_out=[
|
|
1674
|
+
RatingKnownFeatures.PLAYER_RATING_DIFFERENCE_FROM_TEAM_PROJECTED,
|
|
1675
|
+
RatingKnownFeatures.PLAYER_OFF_RATING,
|
|
1676
|
+
RatingKnownFeatures.TEAM_OFF_RATING_PROJECTED,
|
|
1677
|
+
],
|
|
1678
|
+
)
|
|
1679
|
+
result = gen.fit_transform(sample_df)
|
|
1680
|
+
|
|
1681
|
+
diff_col = "player_rating_difference_from_team_projected_perf"
|
|
1682
|
+
player_col = "player_off_rating_perf"
|
|
1683
|
+
team_col = "team_off_rating_projected_perf"
|
|
1684
|
+
|
|
1685
|
+
assert diff_col in result.columns
|
|
1686
|
+
assert player_col in result.columns
|
|
1687
|
+
assert team_col in result.columns
|
|
1688
|
+
|
|
1689
|
+
for row in result.iter_rows(named=True):
|
|
1690
|
+
expected = row[player_col] - row[team_col]
|
|
1691
|
+
assert row[diff_col] == pytest.approx(expected, rel=1e-9)
|
tests/scorer/test_score.py
CHANGED
|
@@ -372,6 +372,136 @@ def test_pwmse_compare_to_naive_granularity(df_type):
|
|
|
372
372
|
assert abs(score - expected) < 1e-10
|
|
373
373
|
|
|
374
374
|
|
|
375
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
376
|
+
def test_pwmse__evaluation_labels_slices_predictions(df_type):
|
|
377
|
+
"""PWMSE with evaluation_labels should only score on specified labels."""
|
|
378
|
+
# Predictions have 5 labels: [-2, -1, 0, 1, 2]
|
|
379
|
+
# But we only want to evaluate on inner labels: [-1, 0, 1]
|
|
380
|
+
df = create_dataframe(
|
|
381
|
+
df_type,
|
|
382
|
+
{
|
|
383
|
+
"pred": [
|
|
384
|
+
[0.1, 0.2, 0.4, 0.2, 0.1], # Full distribution over 5 labels
|
|
385
|
+
[0.05, 0.15, 0.5, 0.2, 0.1],
|
|
386
|
+
],
|
|
387
|
+
"target": [0, 1],
|
|
388
|
+
},
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Score with all labels
|
|
392
|
+
scorer_full = PWMSE(pred_column="pred", target="target", labels=[-2, -1, 0, 1, 2])
|
|
393
|
+
score_full = scorer_full.score(df)
|
|
394
|
+
|
|
395
|
+
# Score with evaluation_labels excluding boundaries
|
|
396
|
+
scorer_eval = PWMSE(
|
|
397
|
+
pred_column="pred",
|
|
398
|
+
target="target",
|
|
399
|
+
labels=[-2, -1, 0, 1, 2],
|
|
400
|
+
evaluation_labels=[-1, 0, 1],
|
|
401
|
+
)
|
|
402
|
+
score_eval = scorer_eval.score(df)
|
|
403
|
+
|
|
404
|
+
# Scores should be different because evaluation_labels excludes boundary penalties
|
|
405
|
+
assert score_full != score_eval
|
|
406
|
+
|
|
407
|
+
# Manual calculation for evaluation_labels case:
|
|
408
|
+
# Slice predictions to indices 1, 2, 3 (corresponding to labels -1, 0, 1)
|
|
409
|
+
# Then renormalize
|
|
410
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1], [0.05, 0.15, 0.5, 0.2, 0.1]])
|
|
411
|
+
preds_sliced = preds_full[:, 1:4] # [-1, 0, 1]
|
|
412
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
413
|
+
|
|
414
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
415
|
+
targets = np.array([0, 1], dtype=np.float64)
|
|
416
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
417
|
+
expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
418
|
+
|
|
419
|
+
assert abs(score_eval - expected) < 1e-10
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
423
|
+
def test_pwmse__evaluation_labels_with_compare_to_naive(df_type):
|
|
424
|
+
"""PWMSE evaluation_labels should also affect naive baseline calculation."""
|
|
425
|
+
df = create_dataframe(
|
|
426
|
+
df_type,
|
|
427
|
+
{
|
|
428
|
+
"pred": [
|
|
429
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
430
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
431
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
432
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
433
|
+
],
|
|
434
|
+
"target": [-1, 0, 0, 1], # Targets within evaluation range
|
|
435
|
+
},
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
scorer = PWMSE(
|
|
439
|
+
pred_column="pred",
|
|
440
|
+
target="target",
|
|
441
|
+
labels=[-2, -1, 0, 1, 2],
|
|
442
|
+
evaluation_labels=[-1, 0, 1],
|
|
443
|
+
compare_to_naive=True,
|
|
444
|
+
)
|
|
445
|
+
score = scorer.score(df)
|
|
446
|
+
|
|
447
|
+
# Naive should be computed using only evaluation_labels
|
|
448
|
+
# With targets [-1, 0, 0, 1], naive probs are [1/4, 2/4, 1/4] for labels [-1, 0, 1]
|
|
449
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
450
|
+
targets = np.array([-1, 0, 0, 1], dtype=np.float64)
|
|
451
|
+
|
|
452
|
+
# Model predictions sliced and renormalized
|
|
453
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]] * 4)
|
|
454
|
+
preds_sliced = preds_full[:, 1:4]
|
|
455
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
456
|
+
|
|
457
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
458
|
+
model_score = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
459
|
+
|
|
460
|
+
# Naive predictions for evaluation_labels only
|
|
461
|
+
naive_probs = np.array([0.25, 0.5, 0.25]) # Based on target distribution
|
|
462
|
+
naive_preds = np.tile(naive_probs, (4, 1))
|
|
463
|
+
naive_score = float((diffs_sqd * naive_preds).sum(axis=1).mean())
|
|
464
|
+
|
|
465
|
+
expected = naive_score - model_score
|
|
466
|
+
assert abs(score - expected) < 1e-10
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
470
|
+
def test_pwmse__evaluation_labels_filters_targets_outside_range(df_type):
|
|
471
|
+
"""PWMSE should filter out targets outside evaluation_labels range."""
|
|
472
|
+
df = create_dataframe(
|
|
473
|
+
df_type,
|
|
474
|
+
{
|
|
475
|
+
"pred": [
|
|
476
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
477
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
478
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
479
|
+
],
|
|
480
|
+
"target": [-2, 0, 2], # -2 and 2 are outside evaluation range [-1, 0, 1]
|
|
481
|
+
},
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
scorer = PWMSE(
|
|
485
|
+
pred_column="pred",
|
|
486
|
+
target="target",
|
|
487
|
+
labels=[-2, -1, 0, 1, 2],
|
|
488
|
+
evaluation_labels=[-1, 0, 1],
|
|
489
|
+
)
|
|
490
|
+
score = scorer.score(df)
|
|
491
|
+
|
|
492
|
+
# Should only use the row with target=0
|
|
493
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]])
|
|
494
|
+
preds_sliced = preds_full[:, 1:4]
|
|
495
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
496
|
+
|
|
497
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
498
|
+
targets = np.array([0], dtype=np.float64)
|
|
499
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
500
|
+
expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
501
|
+
|
|
502
|
+
assert abs(score - expected) < 1e-10
|
|
503
|
+
|
|
504
|
+
|
|
375
505
|
# ============================================================================
|
|
376
506
|
# D. MeanBiasScorer Tests
|
|
377
507
|
# ============================================================================
|
|
@@ -1762,6 +1892,129 @@ def test_pwmse__accepts_ndarray_predictions(df_type):
|
|
|
1762
1892
|
assert score >= 0
|
|
1763
1893
|
|
|
1764
1894
|
|
|
1895
|
+
# ============================================================================
|
|
1896
|
+
# ThresholdEventScorer with granularity and compare_to_naive Tests
|
|
1897
|
+
# ============================================================================
|
|
1898
|
+
|
|
1899
|
+
|
|
1900
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1901
|
+
def test_threshold_event_scorer__granularity_with_compare_to_naive(df_type):
|
|
1902
|
+
"""ThresholdEventScorer fails when combining compare_to_naive with granularity.
|
|
1903
|
+
|
|
1904
|
+
Bug: When granularity is set, binary_scorer.score() returns a dict, but
|
|
1905
|
+
the naive comparison tries to do dict - dict which fails with:
|
|
1906
|
+
'unsupported operand type(s) for -: 'dict' and 'dict''
|
|
1907
|
+
"""
|
|
1908
|
+
df = create_dataframe(
|
|
1909
|
+
df_type,
|
|
1910
|
+
{
|
|
1911
|
+
"qtr": [1, 1, 1, 2, 2, 2],
|
|
1912
|
+
"dist": [
|
|
1913
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1914
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1915
|
+
[0.3, 0.4, 0.2, 0.1],
|
|
1916
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1917
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1918
|
+
[0.2, 0.2, 0.3, 0.3],
|
|
1919
|
+
],
|
|
1920
|
+
"ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
|
|
1921
|
+
"rush_yards": [3, 2, 0, 1, 2, 4],
|
|
1922
|
+
},
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1925
|
+
scorer = ThresholdEventScorer(
|
|
1926
|
+
dist_column="dist",
|
|
1927
|
+
threshold_column="ydstogo",
|
|
1928
|
+
outcome_column="rush_yards",
|
|
1929
|
+
labels=[0, 1, 2, 3],
|
|
1930
|
+
compare_to_naive=True,
|
|
1931
|
+
granularity=["qtr"],
|
|
1932
|
+
)
|
|
1933
|
+
|
|
1934
|
+
result = scorer.score(df)
|
|
1935
|
+
|
|
1936
|
+
assert isinstance(result, dict)
|
|
1937
|
+
assert len(result) == 2
|
|
1938
|
+
assert (1,) in result
|
|
1939
|
+
assert (2,) in result
|
|
1940
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
1941
|
+
|
|
1942
|
+
|
|
1943
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1944
|
+
def test_threshold_event_scorer__granularity_with_compare_to_naive_and_naive_granularity(df_type):
|
|
1945
|
+
"""ThresholdEventScorer with both granularity and naive_granularity."""
|
|
1946
|
+
df = create_dataframe(
|
|
1947
|
+
df_type,
|
|
1948
|
+
{
|
|
1949
|
+
"qtr": [1, 1, 1, 2, 2, 2],
|
|
1950
|
+
"team": ["A", "A", "B", "A", "B", "B"],
|
|
1951
|
+
"dist": [
|
|
1952
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1953
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1954
|
+
[0.3, 0.4, 0.2, 0.1],
|
|
1955
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1956
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1957
|
+
[0.2, 0.2, 0.3, 0.3],
|
|
1958
|
+
],
|
|
1959
|
+
"ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
|
|
1960
|
+
"rush_yards": [3, 2, 0, 1, 2, 4],
|
|
1961
|
+
},
|
|
1962
|
+
)
|
|
1963
|
+
|
|
1964
|
+
scorer = ThresholdEventScorer(
|
|
1965
|
+
dist_column="dist",
|
|
1966
|
+
threshold_column="ydstogo",
|
|
1967
|
+
outcome_column="rush_yards",
|
|
1968
|
+
labels=[0, 1, 2, 3],
|
|
1969
|
+
compare_to_naive=True,
|
|
1970
|
+
naive_granularity=["team"],
|
|
1971
|
+
granularity=["qtr"],
|
|
1972
|
+
)
|
|
1973
|
+
|
|
1974
|
+
result = scorer.score(df)
|
|
1975
|
+
|
|
1976
|
+
assert isinstance(result, dict)
|
|
1977
|
+
assert len(result) == 2
|
|
1978
|
+
assert (1,) in result
|
|
1979
|
+
assert (2,) in result
|
|
1980
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1984
|
+
def test_threshold_event_scorer__multi_column_granularity_with_compare_to_naive(df_type):
|
|
1985
|
+
"""ThresholdEventScorer with multi-column granularity and compare_to_naive."""
|
|
1986
|
+
df = create_dataframe(
|
|
1987
|
+
df_type,
|
|
1988
|
+
{
|
|
1989
|
+
"qtr": [1, 1, 2, 2],
|
|
1990
|
+
"half": [1, 1, 2, 2],
|
|
1991
|
+
"dist": [
|
|
1992
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1993
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1994
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1995
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1996
|
+
],
|
|
1997
|
+
"ydstogo": [2.0, 3.0, 2.0, 1.0],
|
|
1998
|
+
"rush_yards": [3, 2, 1, 2],
|
|
1999
|
+
},
|
|
2000
|
+
)
|
|
2001
|
+
|
|
2002
|
+
scorer = ThresholdEventScorer(
|
|
2003
|
+
dist_column="dist",
|
|
2004
|
+
threshold_column="ydstogo",
|
|
2005
|
+
outcome_column="rush_yards",
|
|
2006
|
+
labels=[0, 1, 2, 3],
|
|
2007
|
+
compare_to_naive=True,
|
|
2008
|
+
granularity=["qtr", "half"],
|
|
2009
|
+
)
|
|
2010
|
+
|
|
2011
|
+
result = scorer.score(df)
|
|
2012
|
+
|
|
2013
|
+
assert isinstance(result, dict)
|
|
2014
|
+
assert len(result) == 2
|
|
2015
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
2016
|
+
|
|
2017
|
+
|
|
1765
2018
|
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1766
2019
|
def test_all_scorers_handle_all_nan_targets(df_type):
|
|
1767
2020
|
"""All scorers handle case where all targets are NaN"""
|
|
@@ -1795,3 +2048,93 @@ def test_all_scorers_handle_all_nan_targets(df_type):
|
|
|
1795
2048
|
assert np.isnan(score) or score == 0.0
|
|
1796
2049
|
except (ValueError, IndexError):
|
|
1797
2050
|
pass
|
|
2051
|
+
SCORER_VALIDATION_CASES = [
|
|
2052
|
+
pytest.param(
|
|
2053
|
+
lambda: MeanBiasScorer(pred_column="pred", target="target", validation_column="is_validation"),
|
|
2054
|
+
lambda: pd.DataFrame(
|
|
2055
|
+
{
|
|
2056
|
+
"pred": [2.0, 0.0],
|
|
2057
|
+
"target": [1.0, 2.0],
|
|
2058
|
+
"is_validation": [1, 0],
|
|
2059
|
+
}
|
|
2060
|
+
),
|
|
2061
|
+
id="mean_bias",
|
|
2062
|
+
),
|
|
2063
|
+
pytest.param(
|
|
2064
|
+
lambda: PWMSE(pred_column="pred", target="target", labels=[0, 1], validation_column="is_validation"),
|
|
2065
|
+
lambda: pd.DataFrame(
|
|
2066
|
+
{
|
|
2067
|
+
"pred": [[0.7, 0.3], [0.4, 0.6]],
|
|
2068
|
+
"target": [0, 1],
|
|
2069
|
+
"is_validation": [1, 0],
|
|
2070
|
+
}
|
|
2071
|
+
),
|
|
2072
|
+
id="pwmse",
|
|
2073
|
+
),
|
|
2074
|
+
pytest.param(
|
|
2075
|
+
lambda: SklearnScorer(
|
|
2076
|
+
scorer_function=mean_absolute_error, pred_column="pred", target="target", validation_column="is_validation"
|
|
2077
|
+
),
|
|
2078
|
+
lambda: pd.DataFrame(
|
|
2079
|
+
{
|
|
2080
|
+
"pred": [1.0, 0.0],
|
|
2081
|
+
"target": [1.0, 0.0],
|
|
2082
|
+
"is_validation": [1, 0],
|
|
2083
|
+
}
|
|
2084
|
+
),
|
|
2085
|
+
id="sklearn",
|
|
2086
|
+
),
|
|
2087
|
+
pytest.param(
|
|
2088
|
+
lambda: ProbabilisticMeanBias(
|
|
2089
|
+
pred_column="pred", target="target", class_column_name="classes", validation_column="is_validation"
|
|
2090
|
+
),
|
|
2091
|
+
lambda: pd.DataFrame(
|
|
2092
|
+
{
|
|
2093
|
+
"pred": [[0.2, 0.8], [0.6, 0.4]],
|
|
2094
|
+
"target": [1, 0],
|
|
2095
|
+
"classes": [[0, 1], [0, 1]],
|
|
2096
|
+
"is_validation": [1, 0],
|
|
2097
|
+
}
|
|
2098
|
+
),
|
|
2099
|
+
id="probabilistic_mean_bias",
|
|
2100
|
+
),
|
|
2101
|
+
pytest.param(
|
|
2102
|
+
lambda: OrdinalLossScorer(pred_column="pred", target="target", classes=[0, 1], validation_column="is_validation"),
|
|
2103
|
+
lambda: pd.DataFrame(
|
|
2104
|
+
{
|
|
2105
|
+
"pred": [[0.2, 0.8], [0.6, 0.4]],
|
|
2106
|
+
"target": [1, 0],
|
|
2107
|
+
"is_validation": [1, 0],
|
|
2108
|
+
}
|
|
2109
|
+
),
|
|
2110
|
+
id="ordinal_loss",
|
|
2111
|
+
),
|
|
2112
|
+
pytest.param(
|
|
2113
|
+
lambda: ThresholdEventScorer(
|
|
2114
|
+
dist_column="dist",
|
|
2115
|
+
threshold_column="threshold",
|
|
2116
|
+
outcome_column="outcome",
|
|
2117
|
+
comparator=Operator.GREATER_THAN_OR_EQUALS,
|
|
2118
|
+
validation_column="is_validation",
|
|
2119
|
+
),
|
|
2120
|
+
lambda: pd.DataFrame(
|
|
2121
|
+
{
|
|
2122
|
+
"dist": [[0.2, 0.8], [0.6, 0.4], [0.3, 0.7]],
|
|
2123
|
+
"threshold": [0.5, 0.2, 0.3],
|
|
2124
|
+
"outcome": [1, 0, 1],
|
|
2125
|
+
"is_validation": [1, 1, 0],
|
|
2126
|
+
}
|
|
2127
|
+
),
|
|
2128
|
+
id="threshold_event",
|
|
2129
|
+
),
|
|
2130
|
+
]
|
|
2131
|
+
|
|
2132
|
+
|
|
2133
|
+
@pytest.mark.parametrize("scorer_factory, df_factory", SCORER_VALIDATION_CASES)
|
|
2134
|
+
def test_scorers_respect_validation_column(scorer_factory, df_factory):
|
|
2135
|
+
"""Scorers should filter on validation_column when specified."""
|
|
2136
|
+
df = df_factory()
|
|
2137
|
+
df_valid = df[df["is_validation"] == 1]
|
|
2138
|
+
score_all = scorer_factory().score(df)
|
|
2139
|
+
score_valid = scorer_factory().score(df_valid)
|
|
2140
|
+
assert score_all == score_valid
|
|
@@ -16,6 +16,49 @@ def column_names():
|
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
class PolarsOnlyGenerator:
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self._features_out = ["polars_only_feature"]
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def features_out(self):
|
|
25
|
+
return self._features_out
|
|
26
|
+
|
|
27
|
+
def fit_transform(self, df, column_names=None):
|
|
28
|
+
if not isinstance(df, pl.DataFrame):
|
|
29
|
+
raise TypeError("Expected polars DataFrame")
|
|
30
|
+
return df.with_columns((pl.col("points") * 2).alias("polars_only_feature"))
|
|
31
|
+
|
|
32
|
+
def transform(self, df):
|
|
33
|
+
if not isinstance(df, pl.DataFrame):
|
|
34
|
+
raise TypeError("Expected polars DataFrame")
|
|
35
|
+
return df.with_columns((pl.col("points") * 2).alias("polars_only_feature"))
|
|
36
|
+
|
|
37
|
+
def future_transform(self, df):
|
|
38
|
+
return self.transform(df)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_feature_generator_pipeline__passes_native_polars_to_custom_generator(column_names):
|
|
42
|
+
data = pl.DataFrame(
|
|
43
|
+
{
|
|
44
|
+
"game_id": [1, 1],
|
|
45
|
+
"team_id": ["A", "B"],
|
|
46
|
+
"player_id": ["p1", "p2"],
|
|
47
|
+
"date": pd.to_datetime(["2023-01-01", "2023-01-01"]),
|
|
48
|
+
"points": [10, 15],
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
pipeline = FeatureGeneratorPipeline(
|
|
53
|
+
feature_generators=[PolarsOnlyGenerator()],
|
|
54
|
+
column_names=column_names,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
result = pipeline.fit_transform(data, column_names=column_names)
|
|
58
|
+
|
|
59
|
+
assert "polars_only_feature" in result.columns
|
|
60
|
+
|
|
61
|
+
|
|
19
62
|
@pytest.mark.parametrize("df_type", [pd.DataFrame, pl.DataFrame])
|
|
20
63
|
def test_feature_generator_pipeline__fit_transform_preserves_row_count(df_type, column_names):
|
|
21
64
|
"""FeatureGeneratorPipeline.fit_transform should preserve row count."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|