spforge 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- spforge/scorer/_score.py +38 -3
- {spforge-0.8.2.dist-info → spforge-0.8.4.dist-info}/METADATA +1 -1
- {spforge-0.8.2.dist-info → spforge-0.8.4.dist-info}/RECORD +7 -7
- tests/scorer/test_score.py +253 -0
- {spforge-0.8.2.dist-info → spforge-0.8.4.dist-info}/WHEEL +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.4.dist-info}/licenses/LICENSE +0 -0
- {spforge-0.8.2.dist-info → spforge-0.8.4.dist-info}/top_level.txt +0 -0
spforge/scorer/_score.py
CHANGED
|
@@ -350,6 +350,7 @@ class PWMSE(BaseScorer):
|
|
|
350
350
|
labels: list[int] | None = None,
|
|
351
351
|
compare_to_naive: bool = False,
|
|
352
352
|
naive_granularity: list[str] | None = None,
|
|
353
|
+
evaluation_labels: list[int] | None = None,
|
|
353
354
|
):
|
|
354
355
|
self.pred_column_name = pred_column
|
|
355
356
|
super().__init__(
|
|
@@ -363,12 +364,39 @@ class PWMSE(BaseScorer):
|
|
|
363
364
|
naive_granularity=naive_granularity,
|
|
364
365
|
)
|
|
365
366
|
self.labels = labels
|
|
367
|
+
self.evaluation_labels = evaluation_labels
|
|
368
|
+
|
|
369
|
+
self._eval_indices: list[int] | None = None
|
|
370
|
+
if self.evaluation_labels is not None and self.labels is not None:
|
|
371
|
+
label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
|
|
372
|
+
self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
|
|
373
|
+
|
|
374
|
+
def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
|
|
375
|
+
if self._eval_indices is None:
|
|
376
|
+
return preds
|
|
377
|
+
sliced = preds[:, self._eval_indices]
|
|
378
|
+
row_sums = sliced.sum(axis=1, keepdims=True)
|
|
379
|
+
row_sums = np.where(row_sums == 0, 1.0, row_sums)
|
|
380
|
+
return sliced / row_sums
|
|
381
|
+
|
|
382
|
+
def _get_scoring_labels(self) -> list[int]:
|
|
383
|
+
if self.evaluation_labels is not None:
|
|
384
|
+
return self.evaluation_labels
|
|
385
|
+
return self.labels
|
|
366
386
|
|
|
367
387
|
def _pwmse_score(self, targets: np.ndarray, preds: np.ndarray) -> float:
|
|
368
|
-
labels = np.asarray(self.
|
|
388
|
+
labels = np.asarray(self._get_scoring_labels(), dtype=np.float64)
|
|
369
389
|
diffs_sqd = (labels[None, :] - targets[:, None]) ** 2
|
|
370
390
|
return float((diffs_sqd * preds).sum(axis=1).mean())
|
|
371
391
|
|
|
392
|
+
def _filter_targets_for_evaluation(self, df: IntoFrameT) -> IntoFrameT:
|
|
393
|
+
if self.evaluation_labels is None:
|
|
394
|
+
return df
|
|
395
|
+
eval_set = set(self.evaluation_labels)
|
|
396
|
+
min_eval, max_eval = min(eval_set), max(eval_set)
|
|
397
|
+
target_col = nw.col(self.target)
|
|
398
|
+
return df.filter((target_col >= min_eval) & (target_col <= max_eval))
|
|
399
|
+
|
|
372
400
|
@narwhals.narwhalify
|
|
373
401
|
def score(self, df: IntoFrameT) -> float | dict[tuple, float]:
|
|
374
402
|
df = apply_filters(df, self.filters)
|
|
@@ -386,6 +414,9 @@ class PWMSE(BaseScorer):
|
|
|
386
414
|
after,
|
|
387
415
|
)
|
|
388
416
|
|
|
417
|
+
# Filter targets outside evaluation_labels range
|
|
418
|
+
df = self._filter_targets_for_evaluation(df)
|
|
419
|
+
|
|
389
420
|
if self.aggregation_level:
|
|
390
421
|
first_pred = df[self.pred_column].to_list()[0] if len(df) > 0 else None
|
|
391
422
|
if isinstance(first_pred, (list, np.ndarray)):
|
|
@@ -415,12 +446,13 @@ class PWMSE(BaseScorer):
|
|
|
415
446
|
|
|
416
447
|
targets = gran_df[self.target].to_numpy().astype(np.float64)
|
|
417
448
|
preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
|
|
449
|
+
preds = self._slice_and_renormalize(preds)
|
|
418
450
|
score = self._pwmse_score(targets, preds)
|
|
419
451
|
if self.compare_to_naive:
|
|
420
452
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
421
453
|
gran_df,
|
|
422
454
|
self.target,
|
|
423
|
-
list(self.
|
|
455
|
+
list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
|
|
424
456
|
self.naive_granularity,
|
|
425
457
|
)
|
|
426
458
|
naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
|
|
@@ -432,12 +464,13 @@ class PWMSE(BaseScorer):
|
|
|
432
464
|
|
|
433
465
|
targets = df[self.target].to_numpy().astype(np.float64)
|
|
434
466
|
preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
|
|
467
|
+
preds = self._slice_and_renormalize(preds)
|
|
435
468
|
score = self._pwmse_score(targets, preds)
|
|
436
469
|
if self.compare_to_naive:
|
|
437
470
|
naive_probs_list = _naive_probability_predictions_for_df(
|
|
438
471
|
df,
|
|
439
472
|
self.target,
|
|
440
|
-
list(self.
|
|
473
|
+
list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
|
|
441
474
|
self.naive_granularity,
|
|
442
475
|
)
|
|
443
476
|
naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
|
|
@@ -1358,4 +1391,6 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1358
1391
|
df, self.outcome_column, labels, self.naive_granularity
|
|
1359
1392
|
)
|
|
1360
1393
|
naive_score = self._score_with_probabilities(df, naive_list)
|
|
1394
|
+
if isinstance(score, dict) and isinstance(naive_score, dict):
|
|
1395
|
+
return {k: naive_score[k] - score[k] for k in score.keys()}
|
|
1361
1396
|
return float(naive_score - score)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.4
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -61,7 +61,7 @@ spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH
|
|
|
61
61
|
spforge/ratings/team_start_rating_generator.py,sha256=ZJe84sTvE4Yep3d4wKJMMJn2Q4PhcCwkO7Wyd5nsYUA,5110
|
|
62
62
|
spforge/ratings/utils.py,sha256=qms5J5SD-FyXDR2G8giDMbu_AoLgI135pjW4nghxROg,3940
|
|
63
63
|
spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
|
|
64
|
-
spforge/scorer/_score.py,sha256=
|
|
64
|
+
spforge/scorer/_score.py,sha256=TR0T9nJj0aeVgGfOE0fZmXlO66CELulYwxhi7ZAxhvY,56184
|
|
65
65
|
spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
|
|
66
66
|
spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
|
|
67
67
|
spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
|
|
@@ -70,7 +70,7 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
|
|
|
70
70
|
spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
|
|
71
71
|
spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
|
|
72
72
|
spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
|
|
73
|
-
spforge-0.8.
|
|
73
|
+
spforge-0.8.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
74
74
|
tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
|
|
75
75
|
tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
|
|
76
76
|
tests/test_feature_generator_pipeline.py,sha256=CAgBknWqawqYi5_hxcPmpxrLVa5elMHVv1VrSVRKXEA,17705
|
|
@@ -93,7 +93,7 @@ tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7
|
|
|
93
93
|
tests/ratings/test_player_rating_generator.py,sha256=3mjqlX159QqOlBoY3r_TFkvLwpE4zlLE0fiqpbfk3ps,58547
|
|
94
94
|
tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
|
|
95
95
|
tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
|
|
96
|
-
tests/scorer/test_score.py,sha256=
|
|
96
|
+
tests/scorer/test_score.py,sha256=KTrGJypQEpU8tmgJ6LU8wK1SRC3PLUXFzZIyiA-UY7U,71749
|
|
97
97
|
tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
|
|
98
98
|
tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
|
|
99
99
|
tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
|
|
@@ -101,7 +101,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
101
101
|
tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
|
|
102
102
|
tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
|
|
103
103
|
tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
|
|
104
|
-
spforge-0.8.
|
|
105
|
-
spforge-0.8.
|
|
106
|
-
spforge-0.8.
|
|
107
|
-
spforge-0.8.
|
|
104
|
+
spforge-0.8.4.dist-info/METADATA,sha256=XNaD0lL_puuuYmZU59VjenOYpLSRCSx_nswef8yCZ4M,20219
|
|
105
|
+
spforge-0.8.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
106
|
+
spforge-0.8.4.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
|
|
107
|
+
spforge-0.8.4.dist-info/RECORD,,
|
tests/scorer/test_score.py
CHANGED
|
@@ -372,6 +372,136 @@ def test_pwmse_compare_to_naive_granularity(df_type):
|
|
|
372
372
|
assert abs(score - expected) < 1e-10
|
|
373
373
|
|
|
374
374
|
|
|
375
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
376
|
+
def test_pwmse__evaluation_labels_slices_predictions(df_type):
|
|
377
|
+
"""PWMSE with evaluation_labels should only score on specified labels."""
|
|
378
|
+
# Predictions have 5 labels: [-2, -1, 0, 1, 2]
|
|
379
|
+
# But we only want to evaluate on inner labels: [-1, 0, 1]
|
|
380
|
+
df = create_dataframe(
|
|
381
|
+
df_type,
|
|
382
|
+
{
|
|
383
|
+
"pred": [
|
|
384
|
+
[0.1, 0.2, 0.4, 0.2, 0.1], # Full distribution over 5 labels
|
|
385
|
+
[0.05, 0.15, 0.5, 0.2, 0.1],
|
|
386
|
+
],
|
|
387
|
+
"target": [0, 1],
|
|
388
|
+
},
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# Score with all labels
|
|
392
|
+
scorer_full = PWMSE(pred_column="pred", target="target", labels=[-2, -1, 0, 1, 2])
|
|
393
|
+
score_full = scorer_full.score(df)
|
|
394
|
+
|
|
395
|
+
# Score with evaluation_labels excluding boundaries
|
|
396
|
+
scorer_eval = PWMSE(
|
|
397
|
+
pred_column="pred",
|
|
398
|
+
target="target",
|
|
399
|
+
labels=[-2, -1, 0, 1, 2],
|
|
400
|
+
evaluation_labels=[-1, 0, 1],
|
|
401
|
+
)
|
|
402
|
+
score_eval = scorer_eval.score(df)
|
|
403
|
+
|
|
404
|
+
# Scores should be different because evaluation_labels excludes boundary penalties
|
|
405
|
+
assert score_full != score_eval
|
|
406
|
+
|
|
407
|
+
# Manual calculation for evaluation_labels case:
|
|
408
|
+
# Slice predictions to indices 1, 2, 3 (corresponding to labels -1, 0, 1)
|
|
409
|
+
# Then renormalize
|
|
410
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1], [0.05, 0.15, 0.5, 0.2, 0.1]])
|
|
411
|
+
preds_sliced = preds_full[:, 1:4] # [-1, 0, 1]
|
|
412
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
413
|
+
|
|
414
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
415
|
+
targets = np.array([0, 1], dtype=np.float64)
|
|
416
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
417
|
+
expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
418
|
+
|
|
419
|
+
assert abs(score_eval - expected) < 1e-10
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
423
|
+
def test_pwmse__evaluation_labels_with_compare_to_naive(df_type):
|
|
424
|
+
"""PWMSE evaluation_labels should also affect naive baseline calculation."""
|
|
425
|
+
df = create_dataframe(
|
|
426
|
+
df_type,
|
|
427
|
+
{
|
|
428
|
+
"pred": [
|
|
429
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
430
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
431
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
432
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
433
|
+
],
|
|
434
|
+
"target": [-1, 0, 0, 1], # Targets within evaluation range
|
|
435
|
+
},
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
scorer = PWMSE(
|
|
439
|
+
pred_column="pred",
|
|
440
|
+
target="target",
|
|
441
|
+
labels=[-2, -1, 0, 1, 2],
|
|
442
|
+
evaluation_labels=[-1, 0, 1],
|
|
443
|
+
compare_to_naive=True,
|
|
444
|
+
)
|
|
445
|
+
score = scorer.score(df)
|
|
446
|
+
|
|
447
|
+
# Naive should be computed using only evaluation_labels
|
|
448
|
+
# With targets [-1, 0, 0, 1], naive probs are [1/4, 2/4, 1/4] for labels [-1, 0, 1]
|
|
449
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
450
|
+
targets = np.array([-1, 0, 0, 1], dtype=np.float64)
|
|
451
|
+
|
|
452
|
+
# Model predictions sliced and renormalized
|
|
453
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]] * 4)
|
|
454
|
+
preds_sliced = preds_full[:, 1:4]
|
|
455
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
456
|
+
|
|
457
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
458
|
+
model_score = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
459
|
+
|
|
460
|
+
# Naive predictions for evaluation_labels only
|
|
461
|
+
naive_probs = np.array([0.25, 0.5, 0.25]) # Based on target distribution
|
|
462
|
+
naive_preds = np.tile(naive_probs, (4, 1))
|
|
463
|
+
naive_score = float((diffs_sqd * naive_preds).sum(axis=1).mean())
|
|
464
|
+
|
|
465
|
+
expected = naive_score - model_score
|
|
466
|
+
assert abs(score - expected) < 1e-10
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
470
|
+
def test_pwmse__evaluation_labels_filters_targets_outside_range(df_type):
|
|
471
|
+
"""PWMSE should filter out targets outside evaluation_labels range."""
|
|
472
|
+
df = create_dataframe(
|
|
473
|
+
df_type,
|
|
474
|
+
{
|
|
475
|
+
"pred": [
|
|
476
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
477
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
478
|
+
[0.1, 0.2, 0.4, 0.2, 0.1],
|
|
479
|
+
],
|
|
480
|
+
"target": [-2, 0, 2], # -2 and 2 are outside evaluation range [-1, 0, 1]
|
|
481
|
+
},
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
scorer = PWMSE(
|
|
485
|
+
pred_column="pred",
|
|
486
|
+
target="target",
|
|
487
|
+
labels=[-2, -1, 0, 1, 2],
|
|
488
|
+
evaluation_labels=[-1, 0, 1],
|
|
489
|
+
)
|
|
490
|
+
score = scorer.score(df)
|
|
491
|
+
|
|
492
|
+
# Should only use the row with target=0
|
|
493
|
+
preds_full = np.array([[0.1, 0.2, 0.4, 0.2, 0.1]])
|
|
494
|
+
preds_sliced = preds_full[:, 1:4]
|
|
495
|
+
preds_renorm = preds_sliced / preds_sliced.sum(axis=1, keepdims=True)
|
|
496
|
+
|
|
497
|
+
eval_labels = np.array([-1, 0, 1], dtype=np.float64)
|
|
498
|
+
targets = np.array([0], dtype=np.float64)
|
|
499
|
+
diffs_sqd = (eval_labels[None, :] - targets[:, None]) ** 2
|
|
500
|
+
expected = float((diffs_sqd * preds_renorm).sum(axis=1).mean())
|
|
501
|
+
|
|
502
|
+
assert abs(score - expected) < 1e-10
|
|
503
|
+
|
|
504
|
+
|
|
375
505
|
# ============================================================================
|
|
376
506
|
# D. MeanBiasScorer Tests
|
|
377
507
|
# ============================================================================
|
|
@@ -1762,6 +1892,129 @@ def test_pwmse__accepts_ndarray_predictions(df_type):
|
|
|
1762
1892
|
assert score >= 0
|
|
1763
1893
|
|
|
1764
1894
|
|
|
1895
|
+
# ============================================================================
|
|
1896
|
+
# ThresholdEventScorer with granularity and compare_to_naive Tests
|
|
1897
|
+
# ============================================================================
|
|
1898
|
+
|
|
1899
|
+
|
|
1900
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1901
|
+
def test_threshold_event_scorer__granularity_with_compare_to_naive(df_type):
|
|
1902
|
+
"""ThresholdEventScorer fails when combining compare_to_naive with granularity.
|
|
1903
|
+
|
|
1904
|
+
Bug: When granularity is set, binary_scorer.score() returns a dict, but
|
|
1905
|
+
the naive comparison tries to do dict - dict which fails with:
|
|
1906
|
+
'unsupported operand type(s) for -: 'dict' and 'dict''
|
|
1907
|
+
"""
|
|
1908
|
+
df = create_dataframe(
|
|
1909
|
+
df_type,
|
|
1910
|
+
{
|
|
1911
|
+
"qtr": [1, 1, 1, 2, 2, 2],
|
|
1912
|
+
"dist": [
|
|
1913
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1914
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1915
|
+
[0.3, 0.4, 0.2, 0.1],
|
|
1916
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1917
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1918
|
+
[0.2, 0.2, 0.3, 0.3],
|
|
1919
|
+
],
|
|
1920
|
+
"ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
|
|
1921
|
+
"rush_yards": [3, 2, 0, 1, 2, 4],
|
|
1922
|
+
},
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1925
|
+
scorer = ThresholdEventScorer(
|
|
1926
|
+
dist_column="dist",
|
|
1927
|
+
threshold_column="ydstogo",
|
|
1928
|
+
outcome_column="rush_yards",
|
|
1929
|
+
labels=[0, 1, 2, 3],
|
|
1930
|
+
compare_to_naive=True,
|
|
1931
|
+
granularity=["qtr"],
|
|
1932
|
+
)
|
|
1933
|
+
|
|
1934
|
+
result = scorer.score(df)
|
|
1935
|
+
|
|
1936
|
+
assert isinstance(result, dict)
|
|
1937
|
+
assert len(result) == 2
|
|
1938
|
+
assert (1,) in result
|
|
1939
|
+
assert (2,) in result
|
|
1940
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
1941
|
+
|
|
1942
|
+
|
|
1943
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1944
|
+
def test_threshold_event_scorer__granularity_with_compare_to_naive_and_naive_granularity(df_type):
|
|
1945
|
+
"""ThresholdEventScorer with both granularity and naive_granularity."""
|
|
1946
|
+
df = create_dataframe(
|
|
1947
|
+
df_type,
|
|
1948
|
+
{
|
|
1949
|
+
"qtr": [1, 1, 1, 2, 2, 2],
|
|
1950
|
+
"team": ["A", "A", "B", "A", "B", "B"],
|
|
1951
|
+
"dist": [
|
|
1952
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1953
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1954
|
+
[0.3, 0.4, 0.2, 0.1],
|
|
1955
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1956
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1957
|
+
[0.2, 0.2, 0.3, 0.3],
|
|
1958
|
+
],
|
|
1959
|
+
"ydstogo": [2.0, 3.0, 1.0, 2.0, 1.0, 3.0],
|
|
1960
|
+
"rush_yards": [3, 2, 0, 1, 2, 4],
|
|
1961
|
+
},
|
|
1962
|
+
)
|
|
1963
|
+
|
|
1964
|
+
scorer = ThresholdEventScorer(
|
|
1965
|
+
dist_column="dist",
|
|
1966
|
+
threshold_column="ydstogo",
|
|
1967
|
+
outcome_column="rush_yards",
|
|
1968
|
+
labels=[0, 1, 2, 3],
|
|
1969
|
+
compare_to_naive=True,
|
|
1970
|
+
naive_granularity=["team"],
|
|
1971
|
+
granularity=["qtr"],
|
|
1972
|
+
)
|
|
1973
|
+
|
|
1974
|
+
result = scorer.score(df)
|
|
1975
|
+
|
|
1976
|
+
assert isinstance(result, dict)
|
|
1977
|
+
assert len(result) == 2
|
|
1978
|
+
assert (1,) in result
|
|
1979
|
+
assert (2,) in result
|
|
1980
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
1981
|
+
|
|
1982
|
+
|
|
1983
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1984
|
+
def test_threshold_event_scorer__multi_column_granularity_with_compare_to_naive(df_type):
|
|
1985
|
+
"""ThresholdEventScorer with multi-column granularity and compare_to_naive."""
|
|
1986
|
+
df = create_dataframe(
|
|
1987
|
+
df_type,
|
|
1988
|
+
{
|
|
1989
|
+
"qtr": [1, 1, 2, 2],
|
|
1990
|
+
"half": [1, 1, 2, 2],
|
|
1991
|
+
"dist": [
|
|
1992
|
+
[0.1, 0.2, 0.3, 0.4],
|
|
1993
|
+
[0.2, 0.3, 0.3, 0.2],
|
|
1994
|
+
[0.4, 0.3, 0.2, 0.1],
|
|
1995
|
+
[0.1, 0.1, 0.4, 0.4],
|
|
1996
|
+
],
|
|
1997
|
+
"ydstogo": [2.0, 3.0, 2.0, 1.0],
|
|
1998
|
+
"rush_yards": [3, 2, 1, 2],
|
|
1999
|
+
},
|
|
2000
|
+
)
|
|
2001
|
+
|
|
2002
|
+
scorer = ThresholdEventScorer(
|
|
2003
|
+
dist_column="dist",
|
|
2004
|
+
threshold_column="ydstogo",
|
|
2005
|
+
outcome_column="rush_yards",
|
|
2006
|
+
labels=[0, 1, 2, 3],
|
|
2007
|
+
compare_to_naive=True,
|
|
2008
|
+
granularity=["qtr", "half"],
|
|
2009
|
+
)
|
|
2010
|
+
|
|
2011
|
+
result = scorer.score(df)
|
|
2012
|
+
|
|
2013
|
+
assert isinstance(result, dict)
|
|
2014
|
+
assert len(result) == 2
|
|
2015
|
+
assert all(isinstance(v, float) for v in result.values())
|
|
2016
|
+
|
|
2017
|
+
|
|
1765
2018
|
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
1766
2019
|
def test_all_scorers_handle_all_nan_targets(df_type):
|
|
1767
2020
|
"""All scorers handle case where all targets are NaN"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|