spforge 0.8.27__py3-none-any.whl → 0.8.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spforge might be problematic. Click here for more details.

@@ -4,7 +4,12 @@ import polars as pl
4
4
  import pytest
5
5
 
6
6
  from spforge import ColumnNames
7
- from spforge.data_structures import PlayerRating, RatingState
7
+ from spforge.data_structures import (
8
+ MatchPerformance,
9
+ PlayerRating,
10
+ PreMatchPlayerRating,
11
+ PreMatchTeamRating,
12
+ )
8
13
  from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures, RatingUnknownFeatures
9
14
 
10
15
 
@@ -138,6 +143,60 @@ def test_fit_transform_participation_weight_scaling(base_cn):
138
143
  assert half_rating > 0
139
144
 
140
145
 
146
+ def test_player_rating_generator_team_rating_coef_affects_predictor(base_cn):
147
+ """Passing a team rating coefficient should affect the predictor used by the generator."""
148
+ generator = PlayerRatingGenerator(
149
+ performance_column="perf",
150
+ column_names=base_cn,
151
+ performance_predictor="difference",
152
+ team_rating_diff_coef=0.5,
153
+ rating_diff_coef=0.0,
154
+ rating_diff_team_from_entity_coef=0.0,
155
+ )
156
+
157
+ predictor = generator._performance_predictor
158
+ match_perf = MatchPerformance(
159
+ performance_value=0.5,
160
+ participation_weight=1.0,
161
+ projected_participation_weight=1.0,
162
+ )
163
+ player_rating = PreMatchPlayerRating(
164
+ id="P1",
165
+ rating_value=100.0,
166
+ games_played=1,
167
+ league=None,
168
+ position=None,
169
+ match_performance=match_perf,
170
+ )
171
+ opponent_team_rating = PreMatchTeamRating(id="Opp", players=[], rating_value=100.0)
172
+ high_team_rating = PreMatchTeamRating(
173
+ id="TeamHigh",
174
+ players=[player_rating],
175
+ rating_value=110.0,
176
+ )
177
+ low_team_rating = PreMatchTeamRating(
178
+ id="TeamLow",
179
+ players=[player_rating],
180
+ rating_value=90.0,
181
+ )
182
+
183
+ high_pred = predictor.predict_performance(
184
+ player_rating=player_rating,
185
+ opponent_team_rating=opponent_team_rating,
186
+ team_rating=high_team_rating,
187
+ )
188
+ low_pred = predictor.predict_performance(
189
+ player_rating=player_rating,
190
+ opponent_team_rating=opponent_team_rating,
191
+ team_rating=low_team_rating,
192
+ )
193
+
194
+ assert predictor.team_rating_diff_coef == 0.5
195
+ assert high_pred > low_pred
196
+ assert high_pred > 0.5
197
+ assert low_pred < 0.5
198
+
199
+
141
200
  def test_fit_transform_batch_update_logic(base_cn):
142
201
  """Test that ratings do not update between matches if update_match_id is the same."""
143
202
  from dataclasses import replace
@@ -2352,3 +2411,413 @@ def test_fit_transform_backward_compatible_without_playing_time_columns(base_cn)
2352
2411
  # Ratings should be updated normally
2353
2412
  assert gen._player_off_ratings["P1"].rating_value != 1000.0
2354
2413
  assert gen._player_off_ratings["P3"].rating_value > gen._player_off_ratings["P4"].rating_value
2414
+
2415
+
2416
+ def test_fit_transform_ignore_opponent_predictor_adapts_to_performance_drift(base_cn):
2417
+ """
2418
+ Test that PlayerRatingNonOpponentPerformancePredictor converges to actual
2419
+ performance with fixed reference (not stuck at 0.5 like rolling average).
2420
+
2421
+ With pre-scaled data (mean=0.48 ≠ 0.5), predictions should converge to 0.48,
2422
+ not stay stuck at 0.5. This verifies the fixed reference allows convergence.
2423
+ """
2424
+ import numpy as np
2425
+
2426
+ np.random.seed(42) # Reproducible test
2427
+ n_matches = 1500
2428
+ n_players_per_team = 5
2429
+ n_teams = 2
2430
+
2431
+ # Target mean intentionally NOT 0.5 to test convergence
2432
+ target_mean = 0.48
2433
+
2434
+ data = {
2435
+ "pid": [],
2436
+ "tid": [],
2437
+ "mid": [],
2438
+ "dt": [],
2439
+ "perf": [],
2440
+ "pw": [],
2441
+ }
2442
+
2443
+ match_id = 0
2444
+ for i in range(n_matches // 2):
2445
+ date = datetime(2019, 1, 1) + timedelta(days=i * 2)
2446
+ date_str = date.strftime("%Y-%m-%d")
2447
+
2448
+ # Generate performance data already in [0,1] with mean at target
2449
+ # Small std to keep values tightly around target mean
2450
+ for team_idx in range(n_teams):
2451
+ team_id = f"T{team_idx + 1}"
2452
+ for player_idx in range(n_players_per_team):
2453
+ player_id = f"P{team_idx}_{player_idx}"
2454
+ # Draw from normal distribution, clip to [0,1]
2455
+ perf = np.random.normal(target_mean, 0.08)
2456
+ perf = max(0.0, min(1.0, perf))
2457
+
2458
+ data["pid"].append(player_id)
2459
+ data["tid"].append(team_id)
2460
+ data["mid"].append(f"M{match_id}")
2461
+ data["dt"].append(date_str)
2462
+ data["perf"].append(perf)
2463
+ data["pw"].append(1.0)
2464
+
2465
+ match_id += 1
2466
+
2467
+ df = pl.DataFrame(data)
2468
+
2469
+ # Verify input data has mean ≠ 0.5 (before any scaling)
2470
+ input_mean = sum(data["perf"]) / len(data["perf"])
2471
+ assert abs(input_mean - target_mean) < 0.01, f"Input data mean should be ~{target_mean}"
2472
+
2473
+ # Use ignore_opponent predictor with fixed reference
2474
+ # CRITICAL: auto_scale_performance=False to preserve the input mean
2475
+ gen = PlayerRatingGenerator(
2476
+ performance_column="perf",
2477
+ column_names=base_cn,
2478
+ performance_predictor="ignore_opponent",
2479
+ auto_scale_performance=False, # Keep input mean at 0.48
2480
+ start_harcoded_start_rating=1000.0,
2481
+ rating_change_multiplier_offense=100, # Faster convergence for test
2482
+ rating_change_multiplier_defense=100,
2483
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_PERFORMANCE],
2484
+ )
2485
+
2486
+ result = gen.fit_transform(df)
2487
+
2488
+ # Check tail (after convergence period)
2489
+ tail_start_match = (n_matches // 2) - 200
2490
+ tail_df = result.filter(
2491
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) >= tail_start_match
2492
+ )
2493
+
2494
+ tail_actual = tail_df["perf"].to_list()
2495
+ tail_preds = tail_df["player_predicted_performance_perf"].to_list()
2496
+
2497
+ mean_actual = sum(tail_actual) / len(tail_actual)
2498
+ mean_pred = sum(tail_preds) / len(tail_preds)
2499
+
2500
+ # With fixed reference, predictions should converge close to actual mean
2501
+ deviation = abs(mean_pred - mean_actual)
2502
+ assert deviation < 0.015, (
2503
+ f"Mean predicted performance {mean_pred:.4f} deviates from "
2504
+ f"actual mean {mean_actual:.4f} by {deviation:.4f}. "
2505
+ f"With fixed reference, predictions should converge to actual performance mean."
2506
+ )
2507
+
2508
+ # Verify we're not stuck at 0.5 (the original rolling average bug)
2509
+ assert abs(mean_pred - 0.5) > 0.01, (
2510
+ f"Mean predicted performance {mean_pred:.4f} is too close to 0.5. "
2511
+ f"System appears stuck at sigmoid midpoint (original rolling average bug)."
2512
+ )
2513
+
2514
+
2515
+ def test_fit_transform_ignore_opponent_with_autoscale_and_temporal_drift(base_cn):
2516
+ """
2517
+ Test that fixed reference works with auto_scale_performance=True and temporal drift.
2518
+
2519
+ With balanced data (overall mean=0.5) and temporal drift (early=0.505, late=0.495):
2520
+ - Auto_scale preserves overall mean at 0.5
2521
+ - Predictions track the SCALED values (not raw 0.505/0.495)
2522
+ - Drift is preserved (early predictions > late predictions)
2523
+ """
2524
+ import numpy as np
2525
+
2526
+ np.random.seed(42)
2527
+ n_matches = 1000
2528
+ n_players_per_team = 5
2529
+
2530
+ data = {
2531
+ "pid": [],
2532
+ "tid": [],
2533
+ "mid": [],
2534
+ "dt": [],
2535
+ "perf": [],
2536
+ "pw": [],
2537
+ }
2538
+
2539
+ match_id = 0
2540
+ for i in range(n_matches // 2):
2541
+ date = datetime(2019, 1, 1) + timedelta(days=i * 2)
2542
+ date_str = date.strftime("%Y-%m-%d")
2543
+
2544
+ # Temporal drift: 0.505 -> 0.495 (overall mean = 0.5)
2545
+ progress = i / (n_matches // 2)
2546
+ period_mean = 0.505 - (0.01 * progress)
2547
+
2548
+ for team_idx in range(2):
2549
+ team_id = f"T{team_idx + 1}"
2550
+ for player_idx in range(n_players_per_team):
2551
+ player_id = f"P{team_idx}_{player_idx}"
2552
+ # Add variance around period mean
2553
+ perf = np.random.normal(period_mean, 0.03)
2554
+ perf = max(0.0, min(1.0, perf))
2555
+
2556
+ data["pid"].append(player_id)
2557
+ data["tid"].append(team_id)
2558
+ data["mid"].append(f"M{match_id}")
2559
+ data["dt"].append(date_str)
2560
+ data["perf"].append(perf)
2561
+ data["pw"].append(1.0)
2562
+
2563
+ match_id += 1
2564
+
2565
+ df = pl.DataFrame(data)
2566
+
2567
+ # Verify raw data is balanced
2568
+ raw_mean = sum(data["perf"]) / len(data["perf"])
2569
+ assert abs(raw_mean - 0.5) < 0.01, f"Raw data should have mean ≈ 0.5, got {raw_mean}"
2570
+
2571
+ gen = PlayerRatingGenerator(
2572
+ performance_column="perf",
2573
+ column_names=base_cn,
2574
+ performance_predictor="ignore_opponent",
2575
+ auto_scale_performance=True, # ← Key: with auto_scale
2576
+ start_harcoded_start_rating=1000.0,
2577
+ rating_change_multiplier_offense=100,
2578
+ rating_change_multiplier_defense=100,
2579
+ non_predictor_features_out=[RatingUnknownFeatures.PLAYER_PREDICTED_PERFORMANCE],
2580
+ )
2581
+
2582
+ result = gen.fit_transform(df)
2583
+
2584
+ # Check that auto_scale created the performance column
2585
+ assert "performance__perf" in result.columns
2586
+
2587
+ # Get overall scaled mean
2588
+ all_scaled = result["performance__perf"].to_list()
2589
+ overall_scaled_mean = sum(all_scaled) / len(all_scaled)
2590
+
2591
+ # Verify overall scaled mean ≈ 0.5 (auto_scale preserves balance)
2592
+ assert abs(overall_scaled_mean - 0.5) < 0.01, (
2593
+ f"Auto_scale should preserve overall mean at 0.5, got {overall_scaled_mean}"
2594
+ )
2595
+
2596
+ # Get early and late periods
2597
+ early_df = result.filter(
2598
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) < 100
2599
+ )
2600
+ late_df = result.filter(
2601
+ pl.col("mid").cast(pl.Utf8).str.extract(r"M(\d+)", 1).cast(pl.Int32) >= (n_matches//2 - 100)
2602
+ )
2603
+
2604
+ early_actual_scaled = early_df["performance__perf"].to_list()
2605
+ early_preds = early_df["player_predicted_performance_perf"].to_list()
2606
+ late_actual_scaled = late_df["performance__perf"].to_list()
2607
+ late_preds = late_df["player_predicted_performance_perf"].to_list()
2608
+
2609
+ early_actual_mean = sum(early_actual_scaled) / len(early_actual_scaled)
2610
+ early_pred_mean = sum(early_preds) / len(early_preds)
2611
+ late_actual_mean = sum(late_actual_scaled) / len(late_actual_scaled)
2612
+ late_pred_mean = sum(late_preds) / len(late_preds)
2613
+
2614
+ # Verify drift is preserved after scaling (strict bounds based on 0.505→0.495 drift)
2615
+ assert early_actual_mean > 0.51, (
2616
+ f"Early period should be > 0.51 after scaling, got {early_actual_mean:.4f}"
2617
+ )
2618
+ assert late_actual_mean < 0.49, (
2619
+ f"Late period should be < 0.49 after scaling, got {late_actual_mean:.4f}"
2620
+ )
2621
+
2622
+ # Verify drift magnitude is significant
2623
+ drift_magnitude = early_actual_mean - late_actual_mean
2624
+ assert drift_magnitude > 0.02, (
2625
+ f"Drift magnitude should be > 0.02, got {drift_magnitude:.4f}"
2626
+ )
2627
+
2628
+ # Verify predictions track the SCALED values (not raw 0.505/0.495)
2629
+ # Tolerance: 0.025 accounts for convergence lag with temporal drift
2630
+ early_deviation = abs(early_pred_mean - early_actual_mean)
2631
+ late_deviation = abs(late_pred_mean - late_actual_mean)
2632
+
2633
+ assert early_deviation < 0.025, (
2634
+ f"Early predictions should converge to scaled actual ({early_actual_mean:.4f}), "
2635
+ f"got {early_pred_mean:.4f}, deviation={early_deviation:.4f}"
2636
+ )
2637
+ assert late_deviation < 0.025, (
2638
+ f"Late predictions should converge to scaled actual ({late_actual_mean:.4f}), "
2639
+ f"got {late_pred_mean:.4f}, deviation={late_deviation:.4f}"
2640
+ )
2641
+
2642
+ # Verify drift is tracked in predictions
2643
+ assert early_pred_mean > late_pred_mean, (
2644
+ f"Predictions should track temporal drift: early ({early_pred_mean:.4f}) > late ({late_pred_mean:.4f})"
2645
+ )
2646
+
2647
+
2648
+ def test_ignore_opponent_predictor_reference_rating_set_correctly(base_cn):
2649
+ """
2650
+ Test that PlayerRatingNonOpponentPerformancePredictor._reference_rating
2651
+ is set correctly from start rating parameters.
2652
+ """
2653
+ # Test 1: With hardcoded start rating
2654
+ gen1 = PlayerRatingGenerator(
2655
+ performance_column="perf",
2656
+ column_names=base_cn,
2657
+ performance_predictor="ignore_opponent",
2658
+ auto_scale_performance=True,
2659
+ start_harcoded_start_rating=1100.0,
2660
+ )
2661
+ assert gen1._performance_predictor._reference_rating == 1100.0, (
2662
+ f"Expected reference rating 1100.0, got {gen1._performance_predictor._reference_rating}"
2663
+ )
2664
+
2665
+ # Test 2: Without hardcoded start (should default to 1000)
2666
+ gen2 = PlayerRatingGenerator(
2667
+ performance_column="perf",
2668
+ column_names=base_cn,
2669
+ performance_predictor="ignore_opponent",
2670
+ auto_scale_performance=True,
2671
+ )
2672
+ assert gen2._performance_predictor._reference_rating == 1000.0, (
2673
+ f"Expected reference rating 1000.0, got {gen2._performance_predictor._reference_rating}"
2674
+ )
2675
+
2676
+ # Test 3: With league ratings (single league)
2677
+ gen3 = PlayerRatingGenerator(
2678
+ performance_column="perf",
2679
+ column_names=base_cn,
2680
+ performance_predictor="ignore_opponent",
2681
+ auto_scale_performance=True,
2682
+ start_league_ratings={"NBA": 1150},
2683
+ )
2684
+ assert gen3._performance_predictor._reference_rating == 1150.0, (
2685
+ f"Expected reference rating 1150.0, got {gen3._performance_predictor._reference_rating}"
2686
+ )
2687
+
2688
+ # Test 4: With multiple league ratings (should use mean)
2689
+ gen4 = PlayerRatingGenerator(
2690
+ performance_column="perf",
2691
+ column_names=base_cn,
2692
+ performance_predictor="ignore_opponent",
2693
+ auto_scale_performance=True,
2694
+ start_league_ratings={"NBA": 1100, "G-League": 900, "EuroLeague": 1000},
2695
+ )
2696
+ expected_mean = (1100 + 900 + 1000) / 3
2697
+ assert gen4._performance_predictor._reference_rating == expected_mean, (
2698
+ f"Expected reference rating {expected_mean}, got {gen4._performance_predictor._reference_rating}"
2699
+ )
2700
+
2701
+ # Test 5: Hardcoded start rating takes precedence over league ratings
2702
+ gen5 = PlayerRatingGenerator(
2703
+ performance_column="perf",
2704
+ column_names=base_cn,
2705
+ performance_predictor="ignore_opponent",
2706
+ auto_scale_performance=True,
2707
+ start_harcoded_start_rating=1200.0,
2708
+ start_league_ratings={"NBA": 1100},
2709
+ )
2710
+ assert gen5._performance_predictor._reference_rating == 1200.0, (
2711
+ f"Expected hardcoded start rating 1200.0 to take precedence, got {gen5._performance_predictor._reference_rating}"
2712
+ )
2713
+
2714
+
2715
+ def test_separate_offense_defense_participation_weights(base_cn):
2716
+ """Test that offense and defense use separate participation weights.
2717
+
2718
+ When participation_weight represents offensive activity (e.g., shots attempted),
2719
+ using it for both offense and defense updates creates bias. This test verifies
2720
+ that defense_participation_weight is used for defensive rating updates.
2721
+ """
2722
+ from dataclasses import replace
2723
+
2724
+ cn = replace(
2725
+ base_cn,
2726
+ participation_weight="shots_attempted",
2727
+ defense_participation_weight="minutes",
2728
+ )
2729
+
2730
+ # Create a scenario where a high-volume shooter (many shots) faces a low-volume shooter
2731
+ # The high-volume shooter should have larger offensive updates but equal defensive updates
2732
+ df = pl.DataFrame(
2733
+ {
2734
+ "pid": ["P1", "P2", "P3", "P4"],
2735
+ "tid": ["T1", "T1", "T2", "T2"],
2736
+ "mid": ["M1", "M1", "M1", "M1"],
2737
+ "dt": ["2024-01-01"] * 4,
2738
+ "perf": [0.6, 0.4, 0.5, 0.5], # Varying performance values
2739
+ "shots_attempted": [10.0, 10.0, 10.0, 10.0], # Same offensive activity
2740
+ "minutes": [30.0, 30.0, 30.0, 30.0], # Same defensive activity
2741
+ }
2742
+ )
2743
+
2744
+ gen = PlayerRatingGenerator(
2745
+ performance_column="perf",
2746
+ column_names=cn,
2747
+ auto_scale_performance=True,
2748
+ rating_change_multiplier_offense=50,
2749
+ rating_change_multiplier_defense=50,
2750
+ )
2751
+
2752
+ result = gen.fit_transform(df)
2753
+
2754
+ # Verify that the defense_participation_weight column is present in the data
2755
+ assert "minutes" in df.columns
2756
+
2757
+ # All players performed equally (0.5) with equal participation weights,
2758
+ # so ratings should be symmetric
2759
+ assert "P1" in gen._player_off_ratings
2760
+ assert "P1" in gen._player_def_ratings
2761
+
2762
+ # Now test with different participation weights for offense vs defense
2763
+ df2 = pl.DataFrame(
2764
+ {
2765
+ "pid": ["P1", "P2", "P3", "P4"],
2766
+ "tid": ["T1", "T1", "T2", "T2"],
2767
+ "mid": ["M2", "M2", "M2", "M2"],
2768
+ "dt": ["2024-01-02"] * 4,
2769
+ "perf": [0.6, 0.4, 0.5, 0.5],
2770
+ "shots_attempted": [20.0, 5.0, 10.0, 10.0], # P1 shoots much more
2771
+ "minutes": [30.0, 30.0, 30.0, 30.0], # But all play same minutes
2772
+ }
2773
+ )
2774
+
2775
+ result2 = gen.fit_transform(df2)
2776
+
2777
+ # P1 should have larger offensive rating changes due to high shots_attempted
2778
+ # but equal defensive rating changes due to equal minutes played
2779
+ p1_off = gen._player_off_ratings["P1"]
2780
+ p2_off = gen._player_off_ratings["P2"]
2781
+ p1_def = gen._player_def_ratings["P1"]
2782
+ p2_def = gen._player_def_ratings["P2"]
2783
+
2784
+ # Both players have same games_played count for defense
2785
+ assert p1_def.games_played == p2_def.games_played
2786
+
2787
+ # Verify that ratings were updated
2788
+ assert p1_off.games_played > 0
2789
+ assert p2_off.games_played > 0
2790
+
2791
+
2792
+ @pytest.mark.parametrize("library", ["polars", "pandas"])
2793
+ def test_defense_participation_weight_backwards_compatibility(base_cn, library):
2794
+ """Test that when defense_participation_weight is not set, it defaults to participation_weight."""
2795
+ import pandas as pd
2796
+
2797
+ df_data = {
2798
+ "pid": ["P1", "P2", "P3", "P4"],
2799
+ "tid": ["T1", "T1", "T2", "T2"],
2800
+ "mid": ["M1", "M1", "M1", "M1"],
2801
+ "dt": ["2024-01-01"] * 4,
2802
+ "perf": [0.6, 0.4, 0.5, 0.5],
2803
+ "pw": [1.0, 0.5, 0.8, 0.8],
2804
+ }
2805
+
2806
+ if library == "polars":
2807
+ df = pl.DataFrame(df_data)
2808
+ else:
2809
+ df = pd.DataFrame(df_data)
2810
+
2811
+ # When defense_participation_weight is None, it should default to participation_weight
2812
+ gen = PlayerRatingGenerator(
2813
+ performance_column="perf",
2814
+ column_names=base_cn,
2815
+ auto_scale_performance=True,
2816
+ )
2817
+
2818
+ result = gen.fit_transform(df)
2819
+
2820
+ # Should work without errors
2821
+ assert result is not None
2822
+ assert len(gen._player_off_ratings) > 0
2823
+ assert len(gen._player_def_ratings) > 0