spforge 0.8.2__py3-none-any.whl → 0.8.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spforge/scorer/_score.py CHANGED
@@ -350,6 +350,7 @@ class PWMSE(BaseScorer):
350
350
  labels: list[int] | None = None,
351
351
  compare_to_naive: bool = False,
352
352
  naive_granularity: list[str] | None = None,
353
+ evaluation_labels: list[int] | None = None,
353
354
  ):
354
355
  self.pred_column_name = pred_column
355
356
  super().__init__(
@@ -363,12 +364,39 @@ class PWMSE(BaseScorer):
363
364
  naive_granularity=naive_granularity,
364
365
  )
365
366
  self.labels = labels
367
+ self.evaluation_labels = evaluation_labels
368
+
369
+ self._eval_indices: list[int] | None = None
370
+ if self.evaluation_labels is not None and self.labels is not None:
371
+ label_to_idx = {lbl: i for i, lbl in enumerate(self.labels)}
372
+ self._eval_indices = [label_to_idx[lbl] for lbl in self.evaluation_labels]
373
+
374
+ def _slice_and_renormalize(self, preds: np.ndarray) -> np.ndarray:
375
+ if self._eval_indices is None:
376
+ return preds
377
+ sliced = preds[:, self._eval_indices]
378
+ row_sums = sliced.sum(axis=1, keepdims=True)
379
+ row_sums = np.where(row_sums == 0, 1.0, row_sums)
380
+ return sliced / row_sums
381
+
382
+ def _get_scoring_labels(self) -> list[int]:
383
+ if self.evaluation_labels is not None:
384
+ return self.evaluation_labels
385
+ return self.labels
366
386
 
367
387
  def _pwmse_score(self, targets: np.ndarray, preds: np.ndarray) -> float:
368
- labels = np.asarray(self.labels, dtype=np.float64)
388
+ labels = np.asarray(self._get_scoring_labels(), dtype=np.float64)
369
389
  diffs_sqd = (labels[None, :] - targets[:, None]) ** 2
370
390
  return float((diffs_sqd * preds).sum(axis=1).mean())
371
391
 
392
+ def _filter_targets_for_evaluation(self, df: IntoFrameT) -> IntoFrameT:
393
+ if self.evaluation_labels is None:
394
+ return df
395
+ eval_set = set(self.evaluation_labels)
396
+ min_eval, max_eval = min(eval_set), max(eval_set)
397
+ target_col = nw.col(self.target)
398
+ return df.filter((target_col >= min_eval) & (target_col <= max_eval))
399
+
372
400
  @narwhals.narwhalify
373
401
  def score(self, df: IntoFrameT) -> float | dict[tuple, float]:
374
402
  df = apply_filters(df, self.filters)
@@ -386,6 +414,9 @@ class PWMSE(BaseScorer):
386
414
  after,
387
415
  )
388
416
 
417
+ # Filter targets outside evaluation_labels range
418
+ df = self._filter_targets_for_evaluation(df)
419
+
389
420
  if self.aggregation_level:
390
421
  first_pred = df[self.pred_column].to_list()[0] if len(df) > 0 else None
391
422
  if isinstance(first_pred, (list, np.ndarray)):
@@ -415,12 +446,13 @@ class PWMSE(BaseScorer):
415
446
 
416
447
  targets = gran_df[self.target].to_numpy().astype(np.float64)
417
448
  preds = np.asarray(gran_df[self.pred_column].to_list(), dtype=np.float64)
449
+ preds = self._slice_and_renormalize(preds)
418
450
  score = self._pwmse_score(targets, preds)
419
451
  if self.compare_to_naive:
420
452
  naive_probs_list = _naive_probability_predictions_for_df(
421
453
  gran_df,
422
454
  self.target,
423
- list(self.labels) if self.labels else None,
455
+ list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
424
456
  self.naive_granularity,
425
457
  )
426
458
  naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
@@ -432,12 +464,13 @@ class PWMSE(BaseScorer):
432
464
 
433
465
  targets = df[self.target].to_numpy().astype(np.float64)
434
466
  preds = np.asarray(df[self.pred_column].to_list(), dtype=np.float64)
467
+ preds = self._slice_and_renormalize(preds)
435
468
  score = self._pwmse_score(targets, preds)
436
469
  if self.compare_to_naive:
437
470
  naive_probs_list = _naive_probability_predictions_for_df(
438
471
  df,
439
472
  self.target,
440
- list(self.labels) if self.labels else None,
473
+ list(self._get_scoring_labels()) if self._get_scoring_labels() else None,
441
474
  self.naive_granularity,
442
475
  )
443
476
  naive_preds = np.asarray(naive_probs_list, dtype=np.float64)
@@ -1358,4 +1391,6 @@ class ThresholdEventScorer(BaseScorer):
1358
1391
  df, self.outcome_column, labels, self.naive_granularity
1359
1392
  )
1360
1393
  naive_score = self._score_with_probabilities(df, naive_list)
1394
+ if isinstance(score, dict) and isinstance(naive_score, dict):
1395
+ return {k: naive_score[k] - score[k] for k in score.keys()}
1361
1396
  return float(naive_score - score)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spforge
3
- Version: 0.8.2
3
+ Version: 0.8.8
4
4
  Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
5
5
  Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
6
6
  License: See LICENSE file
@@ -17,7 +17,7 @@ Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: numpy>=1.23.4
19
19
  Requires-Dist: optuna>=3.4.0
20
- Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: pandas<3.0.0,>=2.0.0
21
21
  Requires-Dist: pendulum>=1.0.0
22
22
  Requires-Dist: scikit-learn>=1.4.0
23
23
  Requires-Dist: lightgbm>=4.0.0
@@ -85,12 +85,12 @@ This example demonstrates predicting NBA game winners using player-level ratings
85
85
  import pandas as pd
86
86
  from sklearn.linear_model import LogisticRegression
87
87
 
88
+ from examples import get_sub_sample_nba_data
88
89
  from spforge.autopipeline import AutoPipeline
89
90
  from spforge.data_structures import ColumnNames
90
- from spforge.ratings import RatingKnownFeatures
91
- from spforge.ratings._player_rating import PlayerRatingGenerator
91
+ from spforge.ratings import PlayerRatingGenerator, RatingKnownFeatures
92
92
 
93
- df = pd.read_parquet("data/game_player_subsample.parquet")
93
+ df = get_sub_sample_nba_data(as_pandas=True, as_polars=False)
94
94
 
95
95
  # Step 1: Define column mappings for your dataset
96
96
  column_names = ColumnNames(
@@ -144,7 +144,7 @@ historical_df = rating_generator.fit_transform(historical_df)
144
144
  pipeline = AutoPipeline(
145
145
  estimator=LogisticRegression(),
146
146
  granularity=["game_id", "team_id"], # Aggregate players → teams
147
- feature_names=rating_generator.features_out + ["location"], # Rating + home/away
147
+ estimator_features=rating_generator.features_out + ["location"], # Rating + home/away
148
148
  )
149
149
 
150
150
  # Train on historical data
@@ -302,8 +302,8 @@ cross_validator = MatchKFoldCrossValidator(
302
302
  prediction_column_name="points_pred",
303
303
  target_column="points",
304
304
  n_splits=3, # Number of temporal folds
305
- # Must include both feature_names AND context_feature_names
306
- features=pipeline.feature_names + pipeline.context_feature_names,
305
+ # Must include both estimator features and context features
306
+ features=pipeline.required_features,
307
307
  )
308
308
 
309
309
  # Generate validation predictions
@@ -330,7 +330,7 @@ print(f"Validation MAE: {mae:.2f}")
330
330
  - `is_validation=1` marks validation rows, `is_validation=0` marks training rows
331
331
  - Use `validation_column` in scorer to score only validation rows
332
332
  - Training data always comes BEFORE validation data chronologically
333
- - Must pass both `feature_names` + `context_feature_names` to `features` parameter
333
+ - Must pass all required features (use `pipeline.required_features`)
334
334
  - Scorers can filter rows (e.g., only score players who played minutes > 0)
335
335
 
336
336
  See [examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py) for a complete example.
@@ -371,7 +371,7 @@ from lightgbm import LGBMClassifier, LGBMRegressor
371
371
  # Approach 1: LGBMClassifier (direct probability prediction)
372
372
  pipeline_classifier = AutoPipeline(
373
373
  estimator=LGBMClassifier(verbose=-100, random_state=42),
374
- feature_names=features_pipeline.features_out,
374
+ estimator_features=features_pipeline.features_out,
375
375
  )
376
376
 
377
377
  # Approach 2: LGBMRegressor + NegativeBinomialEstimator
@@ -385,13 +385,7 @@ distribution_estimator = NegativeBinomialEstimator(
385
385
 
386
386
  pipeline_negbin = AutoPipeline(
387
387
  estimator=distribution_estimator,
388
- feature_names=features_pipeline.features_out,
389
- context_feature_names=[
390
- column_names.player_id,
391
- column_names.start_date,
392
- column_names.team_id,
393
- column_names.match_id,
394
- ],
388
+ estimator_features=features_pipeline.features_out,
395
389
  predictor_transformers=[
396
390
  EstimatorTransformer(
397
391
  prediction_column_name="points_estimate",
@@ -439,7 +433,7 @@ points_estimate_transformer = EstimatorTransformer(
439
433
  # Stage 2: Refine estimate using Stage 1 output
440
434
  player_points_pipeline = AutoPipeline(
441
435
  estimator=LGBMRegressor(verbose=-100, n_estimators=50),
442
- feature_names=features_pipeline.features_out, # Original features
436
+ estimator_features=features_pipeline.features_out, # Original features
443
437
  # predictor_transformers execute first, adding their predictions
444
438
  predictor_transformers=[points_estimate_transformer],
445
439
  )
@@ -474,4 +468,3 @@ For complete, runnable examples with detailed explanations:
474
468
  - **[examples/nba/cross_validation_example.py](examples/nba/cross_validation_example.py)** - Time-series CV, distributions, and scoring
475
469
  - **[examples/nba/predictor_transformers_example.py](examples/nba/predictor_transformers_example.py)** - Multi-stage hierarchical modeling
476
470
  - **[examples/nba/game_winner_example.py](examples/nba/game_winner_example.py)** - Basic workflow for game winner prediction
477
-
@@ -1,23 +1,23 @@
1
1
  examples/__init__.py,sha256=qGLpphvrjQj0-zS9vP0Q07L-anDnmw7gFZJUEBgYG3U,158
2
2
  examples/game_level_example.py,sha256=EOr-H0K79O3Zah4wWuqa5DLmT2iZGbfgxD-xSU2-dfI,2244
3
3
  examples/lol/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- examples/lol/pipeline_transformer_example.py,sha256=HousFjE2dbJgdONur4PxwhW2SGQIJGI8aZUIb4TEvIo,4317
4
+ examples/lol/pipeline_transformer_example.py,sha256=XVmm6Xya5z7JyOA0s-DISOlR2I1wpUthCyhRSt9n6qE,3402
5
5
  examples/lol/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  examples/lol/data/subsample_lol_data.parquet,sha256=tl04XDslylECJUV1e0DGeqMb6D0Uh6_48NO6TykdgQI,343549
7
7
  examples/lol/data/utils.py,sha256=Lt3XNNa5cavvFXHaTQ-GOPxSuWmPEfEO0CVXQEyF_s0,486
8
8
  examples/nba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- examples/nba/cross_validation_example.py,sha256=WD_52vO9m1rILVfXXf8uIb_odpaK-TZ4iOewHU19lTg,5281
10
- examples/nba/feature_engineering_example.py,sha256=0OHJ2w6vkHvFB2bYwIQQb8HjFA5bfXc7tLmngnahG74,7708
11
- examples/nba/game_winner_example.py,sha256=RNKYSwpArr08yDWOtkxjx7eAldf97WYDBBfb3tsVSZc,2975
12
- examples/nba/predictor_transformers_example.py,sha256=mPXRVPx4J5VZtxYH89k7pwh7_EGZ0CXoNHeh2s0AOp8,8499
9
+ examples/nba/cross_validation_example.py,sha256=XVnQJ5mqMou9z83ML5J0wS3gk-pa56sdvahJYQgZ8os,5056
10
+ examples/nba/feature_engineering_example.py,sha256=BDd5594Yi_56lGDqz3SYQkwT8NVZyFkgv3gKPCsAjz4,8197
11
+ examples/nba/game_winner_example.py,sha256=7VVHxGyU2uPjT9q6lDMHJ5KpkWp9gU8brxr_UZfuSHg,3189
12
+ examples/nba/predictor_transformers_example.py,sha256=Fl4BY_hVW0iYERolN6s-ZB2xv-UxOK547L6iI5t0r0Y,8807
13
13
  examples/nba/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  examples/nba/data/game_player_subsample.parquet,sha256=ODJxHC-mUYbJ7r-ScUFtPU7hrFuxLUbbDSobmpCkw0w,279161
15
15
  examples/nba/data/utils.py,sha256=41hxLQ1d6ZgBEcHa5MI0-fG5KbsRi07cclMPQZM95ek,509
16
- spforge/__init__.py,sha256=5d9zzBxaaXj2JeBNwfUwuV7Ll5FERHyXONsFiuKhHSQ,402
16
+ spforge/__init__.py,sha256=8vZhy7XUpzqWkVKpXqwqOLDkQlNytRhyf4qjwObfXgU,468
17
17
  spforge/autopipeline.py,sha256=ZUwv6Q6O8cD0u5TiSqG6lhW0j16RlSb160AzuOeL2R8,23186
18
18
  spforge/base_feature_generator.py,sha256=RbD00N6oLCQQcEb_VF5wbwZztl-X8k9B0Wlaj9Os1iU,668
19
19
  spforge/data_structures.py,sha256=k82v5r79vl0_FAVvsxVF9Nbzb5FoHqVrlHZlEXGc5gQ,7298
20
- spforge/features_generator_pipeline.py,sha256=KRLv03QNgXH4DvPEgpin8g0TQEelDik3Ynec12E1o20,8016
20
+ spforge/features_generator_pipeline.py,sha256=n8vzZKqXNFcFRDWZhllnkhAh5NFXdOD3FEIOpHcay8E,8208
21
21
  spforge/utils.py,sha256=2RlivUtMX5wQWpFVUyFfexDJE0wV6uZ4dnNzvoDmVhI,2644
22
22
  spforge/cross_validator/__init__.py,sha256=1QHgTFIZ73EZ_MgJlUKimxdUmB7MFaOEy6jsUs6V0T0,134
23
23
  spforge/cross_validator/_base.py,sha256=-zxZ2Q2tYlGIwjQQMf9_OglS_doppp47gVElkJuBY7E,1199
@@ -43,25 +43,26 @@ spforge/feature_generator/_rolling_mean_binary.py,sha256=lmODy-o9Dd9pb8IlA7g4UyA
43
43
  spforge/feature_generator/_rolling_mean_days.py,sha256=EZQmFmYVQB-JjZV5k8bOWnaTxNpPDCZAjdfdhiiG4r4,8415
44
44
  spforge/feature_generator/_rolling_window.py,sha256=HT8LezsRIPNAlMEoP9oTPW2bKFu55ZSRnQZGST7fncw,8836
45
45
  spforge/feature_generator/_utils.py,sha256=KDn33ia1OYJTK8THFpvc_uRiH_Bl3fImGqqbfzs0YA4,9654
46
- spforge/hyperparameter_tuning/__init__.py,sha256=pp7aWzydObRawFLcGiaUrUduEQIjln2uif9nKCTk6l4,509
47
- spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=19sHW8zlyG88xZdyqSrp9gFI5oLb-f6THlbhYAtTfmY,3534
48
- spforge/hyperparameter_tuning/_tuner.py,sha256=S70IEmHxl36LaUPl_wc_2mo46qUuH8t0eH0aXuCuGfA,9586
46
+ spforge/hyperparameter_tuning/__init__.py,sha256=N2sKG4SvG41hlsFT2kx_DQYMmXsQr-8031Tu_rxlxyY,1015
47
+ spforge/hyperparameter_tuning/_default_search_spaces.py,sha256=Sm5IrHAW0-vRC8jqCPX0pDi_C-W3L_MoEKGA8bx1Zbc,7546
48
+ spforge/hyperparameter_tuning/_tuner.py,sha256=uovhGqhe8-fdhi79aErUmE2h5NCycFQEIRv5WCjpC7E,16732
49
49
  spforge/performance_transformers/__init__.py,sha256=U6d7_kltbUMLYCGBk4QAFVPJTxXD3etD9qUftV-O3q4,422
50
50
  spforge/performance_transformers/_performance_manager.py,sha256=KwAga6dGhNkXi-MDW6LPjwk6VZwCcjo5L--jnk9aio8,9706
51
51
  spforge/performance_transformers/_performances_transformers.py,sha256=0lxuWjAfWBRXRgQsNJHjw3P-nlTtHBu4_bOVdoy7hq4,15536
52
- spforge/ratings/__init__.py,sha256=jAa_xF2e-96FoyD57EYFKE-mO6OnK23siJOB4tzbyek,387
52
+ spforge/ratings/__init__.py,sha256=OZVH2Lo6END3n1X8qi4QcyAPlThIwAYwVKCiIuOQSQU,576
53
53
  spforge/ratings/_base.py,sha256=dRMkIGj5-2zKddygaEA4g16WCyXon7v8Xa1ymm7IuoM,14335
54
- spforge/ratings/_player_rating.py,sha256=05CuiSa2_uM0xtYpxT00OOxU_TmW4qt6dsXvn7seFss,50861
54
+ spforge/ratings/_player_rating.py,sha256=MyqsyLSY6d7_bxDSnF8eWOyXpSCADWGdepdFSGM4cHw,51365
55
55
  spforge/ratings/_team_rating.py,sha256=T0kFiv3ykYSrVGGsVRa8ZxLB0WMnagxqdFDzl9yZ_9g,24813
56
56
  spforge/ratings/enums.py,sha256=s7z_RcZS6Nlgfa_6tasO8_IABZJwywexe7sep9DJBgo,1739
57
57
  spforge/ratings/league_identifier.py,sha256=_KDUKOwoNU6RNFKE5jju4eYFGVNGBdJsv5mhNvMakfc,6019
58
+ spforge/ratings/league_start_rating_optimizer.py,sha256=Q4Vo3QT-r55qP4aD9WftsTB00UOSRvxM1khlyuAGWNM,8582
58
59
  spforge/ratings/player_performance_predictor.py,sha256=cMxzQuk0nF1MsT_M32g-3mxVdAEbZ-S7TUjEPYdo3Yg,8361
59
60
  spforge/ratings/start_rating_generator.py,sha256=_7hIJ9KRVCwsCoY1GIzY8cuOdHR8RH_BCMeMwQG3E04,6776
60
61
  spforge/ratings/team_performance_predictor.py,sha256=ThQOmYQUqKBB46ONYHOMM2arXFH8AkyKpAZzs80SjHA,7217
61
62
  spforge/ratings/team_start_rating_generator.py,sha256=ZJe84sTvE4Yep3d4wKJMMJn2Q4PhcCwkO7Wyd5nsYUA,5110
62
63
  spforge/ratings/utils.py,sha256=qms5J5SD-FyXDR2G8giDMbu_AoLgI135pjW4nghxROg,3940
63
64
  spforge/scorer/__init__.py,sha256=wj8PCvYIl6742Xwmt86c3oy6iqE8Ss-OpwHud6kd9IY,256
64
- spforge/scorer/_score.py,sha256=pzI-upJU4bwm33J5CGhV8bY8HquudnS--0Z6bhD4xew,54498
65
+ spforge/scorer/_score.py,sha256=TR0T9nJj0aeVgGfOE0fZmXlO66CELulYwxhi7ZAxhvY,56184
65
66
  spforge/transformers/__init__.py,sha256=IPCsMcsgBqG52d0ttATLCY4HvFCQZddExlLt74U-zuI,390
66
67
  spforge/transformers/_base.py,sha256=-smr_McQF9bYxM5-Agx6h7Xv_fhZzPfpAdQV-qK18bs,1134
67
68
  spforge/transformers/_net_over_predicted.py,sha256=5dC8pvA1DNO0yXPSgJSMGU8zAHi-maUELm7FqFQVo-U,2321
@@ -70,15 +71,17 @@ spforge/transformers/_other_transformer.py,sha256=xLfaFIhkFsigAoitB4x3F8An2j9ymd
70
71
  spforge/transformers/_predictor.py,sha256=2sE6gfVrilXzPVcBurSrtqHw33v2ljygQcEYXt9LhZc,3119
71
72
  spforge/transformers/_simple_transformer.py,sha256=zGUFNQYMeoDSa2CoQejQNiNmKCBN5amWTvyOchiUHj0,5660
72
73
  spforge/transformers/_team_ratio_predictor.py,sha256=g8_bR53Yyv0iNCtol1O9bgJSeZcIco_AfbQuUxQJkeY,6884
73
- spforge-0.8.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
+ spforge-0.8.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
74
75
  tests/test_autopipeline.py,sha256=WXHeqBdjQD6xaXVkzvS8ocz0WVP9R7lN0PiHJ2iD8nA,16911
75
76
  tests/test_autopipeline_context.py,sha256=IuRUY4IA6uMObvbl2pXSaXO2_tl3qX6wEbTZY0dkTMI,1240
76
- tests/test_feature_generator_pipeline.py,sha256=CAgBknWqawqYi5_hxcPmpxrLVa5elMHVv1VrSVRKXEA,17705
77
+ tests/test_feature_generator_pipeline.py,sha256=CK0zVL8PfTncy3RmG9i-YpgwjOIV7yJhV7Q44tbetI8,19020
77
78
  tests/cross_validator/test_cross_validator.py,sha256=itCGhNY8-NbDbKbhxHW20wiLuRst7-Rixpmi3FSKQtA,17474
78
79
  tests/distributions/test_distribution.py,sha256=aU8hfCgliM80TES4WGjs9KFXpV8XghBGF7Hu9sqEVSE,10982
80
+ tests/end_to_end/test_estimator_hyperparameter_tuning.py,sha256=fZCJ9rrED2vT68B9ovmVA1cIG2pHRTjy9xzZLxxpEBo,2513
81
+ tests/end_to_end/test_league_start_rating_optimizer.py,sha256=Mmct2ixp4c6L7PGym8wZc7E-Csozryt1g4_o6OCc1uI,3141
79
82
  tests/end_to_end/test_lol_player_kills.py,sha256=RJSYUbPrZ-RzSxGggj03yN0JKYeTB1JghVGYFMYia3Y,11891
80
83
  tests/end_to_end/test_nba_player_points.py,sha256=kyzjo7QIcvpteps29Wix6IS_eJG9d1gHLeWtIHpkWMs,9066
81
- tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=eOsTSVWv16bc0l_nCxH4x8jF-gsmn4Ttfv92mHqSXzc,6303
84
+ tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py,sha256=LXRkI_6Ho2kzJVbNAM17QFhx_MP9WdDJXCO9dWgJGNA,6491
82
85
  tests/end_to_end/test_nba_prediction_consistency.py,sha256=o3DckJasx_I1ed6MhMYZUo2WSDvQ_p3HtJa9DCWTIYU,9857
83
86
  tests/estimator/test_sklearn_estimator.py,sha256=tVfOP9Wx-tV1b6DcHbGxQHZQzNPA0Iobq8jTcUrk59U,48668
84
87
  tests/feature_generator/test_lag.py,sha256=5Ffrv0V9cwkbkzRMPBe3_c_YNW-W2al-XH_acQIvdeg,19531
@@ -87,13 +90,14 @@ tests/feature_generator/test_rolling_against_opponent.py,sha256=20kH1INrWy6DV7AS
87
90
  tests/feature_generator/test_rolling_mean_binary.py,sha256=KuIavJ37Pt8icAb50B23lxdWEPVSHQ7NZHisD1BDpmU,16216
88
91
  tests/feature_generator/test_rolling_mean_days.py,sha256=EyOvdJDnmgPfe13uQBOkwo7fAteBQx-tnyuGM4ng2T8,18884
89
92
  tests/feature_generator/test_rolling_window.py,sha256=YBJo36OK3ILYeXrH06ylXqviUcCaGYaVQaK5RJzwM7Y,23239
93
+ tests/hyperparameter_tuning/test_estimator_tuner.py,sha256=iewME41d6LR2aQ0OtohGFtN_ocJUwTeqvs6L0QDmfG4,4413
90
94
  tests/hyperparameter_tuning/test_rating_tuner.py,sha256=PyCFP3KPc4Iy9E_X9stCVxra14uMgC1tuRwuQ30rO_o,13195
91
95
  tests/performance_transformers/test_performance_manager.py,sha256=bfC5GiBuzHw-mLmKeEzBUUPuKm0ayax2bsF1j88W8L0,10120
92
96
  tests/performance_transformers/test_performances_transformers.py,sha256=A-tGiCx7kXrj1cVj03Bc7prOeZ1_Ryz8YFx9uj3eK6w,11064
93
- tests/ratings/test_player_rating_generator.py,sha256=3mjqlX159QqOlBoY3r_TFkvLwpE4zlLE0fiqpbfk3ps,58547
97
+ tests/ratings/test_player_rating_generator.py,sha256=FGH3Tq0uFoSlkS_XMldsUKhsovBRBvzH9EbqjKvg2O0,59601
94
98
  tests/ratings/test_ratings_property.py,sha256=ckyfGILXa4tfQvsgyXEzBDNr2DUmHwFRV13N60w66iE,6561
95
99
  tests/ratings/test_team_rating_generator.py,sha256=cDnf1zHiYC7pkgydE3MYr8wSTJIq-bPfSqhIRI_4Tic,95357
96
- tests/scorer/test_score.py,sha256=5uVCZyEYsonrfDL5tY9sYSlyXIk3JJy5VPUP7zHpkqY,63163
100
+ tests/scorer/test_score.py,sha256=_Vd6tKpy_1GeOxU7Omxci4CFf7PvRGMefEI0gv2gV6A,74688
97
101
  tests/scorer/test_score_aggregation_granularity.py,sha256=h-hyFOLzwp-92hYVU7CwvlRJ8jhB4DzXCtqgI-zcoqM,13677
98
102
  tests/transformers/test_estimator_transformer_context.py,sha256=5GOHbuWCWBMFwwOTJOuD4oNDsv-qDR0OxNZYGGuMdag,1819
99
103
  tests/transformers/test_net_over_predicted.py,sha256=vh7O1iRRPf4vcW9aLhOMAOyatfM5ZnLsQBKNAYsR3SU,3363
@@ -101,7 +105,7 @@ tests/transformers/test_other_transformer.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
101
105
  tests/transformers/test_predictor_transformer.py,sha256=N1aBYLjN3ldpYZLwjih_gTFYSMitrZu-PNK78W6RHaQ,6877
102
106
  tests/transformers/test_simple_transformer.py,sha256=wWR0qjLb_uS4HXrJgGdiqugOY1X7kwd1_OPS02IT2b8,4676
103
107
  tests/transformers/test_team_ratio_predictor.py,sha256=fOUP_JvNJi-3kom3ZOs1EdG0I6Z8hpLpYKNHu1eWtOw,8562
104
- spforge-0.8.2.dist-info/METADATA,sha256=xcw8LWeJSYUBQ01Owe9FiI8fNmJVrlRRb2lnBcXSOmo,20219
105
- spforge-0.8.2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
106
- spforge-0.8.2.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
107
- spforge-0.8.2.dist-info/RECORD,,
108
+ spforge-0.8.8.dist-info/METADATA,sha256=fO2JHqnnqOrjkWZ1Zh4rgYg58bi4YzxhSa8I72wqDs4,20047
109
+ spforge-0.8.8.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
110
+ spforge-0.8.8.dist-info/top_level.txt,sha256=6UW2M5a7WKOeaAi900qQmRKNj5-HZzE8-eUD9Y9LTq0,23
111
+ spforge-0.8.8.dist-info/RECORD,,
@@ -0,0 +1,85 @@
1
+ import polars as pl
2
+ from sklearn.linear_model import LogisticRegression
3
+ from sklearn.metrics import mean_absolute_error
4
+
5
+ from examples import get_sub_sample_nba_data
6
+ from spforge import AutoPipeline, ColumnNames, EstimatorHyperparameterTuner, ParamSpec
7
+ from spforge.cross_validator import MatchKFoldCrossValidator
8
+ from spforge.scorer import SklearnScorer
9
+
10
+
11
+ def test_nba_estimator_hyperparameter_tuning__workflow_completes():
12
+ df = get_sub_sample_nba_data(as_polars=True, as_pandas=False)
13
+ column_names = ColumnNames(
14
+ team_id="team_id",
15
+ match_id="game_id",
16
+ start_date="start_date",
17
+ player_id="player_id",
18
+ participation_weight="minutes_ratio",
19
+ )
20
+
21
+ df = df.sort(
22
+ [
23
+ column_names.start_date,
24
+ column_names.match_id,
25
+ column_names.team_id,
26
+ column_names.player_id,
27
+ ]
28
+ )
29
+
30
+ df = df.with_columns(
31
+ [
32
+ (pl.col("minutes") / pl.col("minutes").sum().over("game_id")).alias(
33
+ "minutes_ratio"
34
+ ),
35
+ (pl.col("points") > pl.lit(10)).cast(pl.Int64).alias("points_over_10"),
36
+ ]
37
+ )
38
+
39
+ estimator = AutoPipeline(
40
+ estimator=LogisticRegression(max_iter=200),
41
+ estimator_features=["minutes", "minutes_ratio"],
42
+ )
43
+
44
+ cv = MatchKFoldCrossValidator(
45
+ match_id_column_name=column_names.match_id,
46
+ date_column_name=column_names.start_date,
47
+ target_column="points_over_10",
48
+ estimator=estimator,
49
+ prediction_column_name="points_pred",
50
+ n_splits=2,
51
+ features=estimator.required_features,
52
+ )
53
+
54
+ scorer = SklearnScorer(
55
+ scorer_function=mean_absolute_error,
56
+ pred_column="points_pred",
57
+ target="points_over_10",
58
+ validation_column="is_validation",
59
+ )
60
+
61
+ tuner = EstimatorHyperparameterTuner(
62
+ estimator=estimator,
63
+ cross_validator=cv,
64
+ scorer=scorer,
65
+ direction="minimize",
66
+ param_search_space={
67
+ "C": ParamSpec(
68
+ param_type="float",
69
+ low=0.1,
70
+ high=2.0,
71
+ log=True,
72
+ ),
73
+ },
74
+ n_trials=3,
75
+ show_progress_bar=False,
76
+ )
77
+
78
+ result = tuner.optimize(df)
79
+
80
+ assert result.best_params is not None
81
+ assert isinstance(result.best_params, dict)
82
+ assert "estimator__C" in result.best_params
83
+ assert isinstance(result.best_value, float)
84
+ assert result.best_trial is not None
85
+ assert result.study is not None
@@ -0,0 +1,117 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+ import pytest
4
+
5
+ from spforge import ColumnNames
6
+ from spforge.ratings import (
7
+ LeagueStartRatingOptimizer,
8
+ PlayerRatingGenerator,
9
+ TeamRatingGenerator,
10
+ )
11
+
12
+
13
+ def _player_df():
14
+ dates = pd.date_range("2024-01-01", periods=3, freq="D")
15
+ rows = []
16
+ for i, date in enumerate(dates):
17
+ mid = f"M{i}"
18
+ for player_idx in range(2):
19
+ rows.append(
20
+ {
21
+ "pid": f"A{player_idx}",
22
+ "tid": "TA",
23
+ "mid": mid,
24
+ "date": date,
25
+ "league": "LCK",
26
+ "perf": 0.4,
27
+ }
28
+ )
29
+ for player_idx in range(2):
30
+ rows.append(
31
+ {
32
+ "pid": f"B{player_idx}",
33
+ "tid": "TB",
34
+ "mid": mid,
35
+ "date": date,
36
+ "league": "LEC",
37
+ "perf": 0.6,
38
+ }
39
+ )
40
+ return pd.DataFrame(rows)
41
+
42
+
43
+ def _team_df():
44
+ dates = pd.date_range("2024-01-01", periods=3, freq="D")
45
+ rows = []
46
+ for i, date in enumerate(dates):
47
+ mid = f"M{i}"
48
+ rows.extend(
49
+ [
50
+ {
51
+ "tid": "TA",
52
+ "mid": mid,
53
+ "date": date,
54
+ "league": "LCK",
55
+ "perf": 0.4,
56
+ },
57
+ {
58
+ "tid": "TB",
59
+ "mid": mid,
60
+ "date": date,
61
+ "league": "LEC",
62
+ "perf": 0.6,
63
+ },
64
+ ]
65
+ )
66
+ return pd.DataFrame(rows)
67
+
68
+
69
+ @pytest.mark.parametrize("use_polars", [False, True])
70
+ def test_league_start_rating_optimizer__adjusts_player_leagues(use_polars):
71
+ cn = ColumnNames(
72
+ player_id="pid",
73
+ team_id="tid",
74
+ match_id="mid",
75
+ start_date="date",
76
+ league="league",
77
+ )
78
+ df = _player_df()
79
+ if use_polars:
80
+ df = pl.from_pandas(df)
81
+ generator = PlayerRatingGenerator(performance_column="perf", column_names=cn)
82
+ optimizer = LeagueStartRatingOptimizer(
83
+ rating_generator=generator,
84
+ n_iterations=1,
85
+ learning_rate=0.5,
86
+ min_cross_region_rows=1,
87
+ )
88
+
89
+ result = optimizer.optimize(df)
90
+
91
+ assert result.league_ratings["LCK"] < 1000
92
+ assert result.league_ratings["LEC"] > 1000
93
+
94
+
95
+ @pytest.mark.parametrize("use_polars", [False, True])
96
+ def test_league_start_rating_optimizer__adjusts_team_leagues(use_polars):
97
+ cn = ColumnNames(
98
+ team_id="tid",
99
+ match_id="mid",
100
+ start_date="date",
101
+ league="league",
102
+ )
103
+ df = _team_df()
104
+ if use_polars:
105
+ df = pl.from_pandas(df)
106
+ generator = TeamRatingGenerator(performance_column="perf", column_names=cn)
107
+ optimizer = LeagueStartRatingOptimizer(
108
+ rating_generator=generator,
109
+ n_iterations=1,
110
+ learning_rate=0.5,
111
+ min_cross_region_rows=1,
112
+ )
113
+
114
+ result = optimizer.optimize(df)
115
+
116
+ assert result.league_ratings["LCK"] < 1000
117
+ assert result.league_ratings["LEC"] > 1000
@@ -97,6 +97,11 @@ def test_nba_player_ratings_hyperparameter_tuning__workflow_completes(
97
97
  "confidence_max_sum",
98
98
  "use_off_def_split",
99
99
  "performance_predictor",
100
+ "start_team_weight",
101
+ "start_league_quantile",
102
+ "start_min_count_for_percentiles",
103
+ "start_min_match_count_team_rating",
104
+ "start_team_rating_subtract",
100
105
  }
101
106
  assert set(result.best_params.keys()) == expected_params
102
107