spforge 0.8.25__tar.gz → 0.8.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.25/spforge.egg-info → spforge-0.8.26}/PKG-INFO +1 -1
- {spforge-0.8.25 → spforge-0.8.26}/pyproject.toml +1 -1
- {spforge-0.8.25 → spforge-0.8.26}/spforge/scorer/_score.py +114 -50
- {spforge-0.8.25 → spforge-0.8.26/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.25 → spforge-0.8.26}/tests/scorer/test_score_aggregation_granularity.py +69 -0
- {spforge-0.8.25 → spforge-0.8.26}/LICENSE +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/MANIFEST.in +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/README.md +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/game_level_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/lol/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/setup.cfg +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/autopipeline.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/data_structures.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_group_by_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/_player_rating.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/ratings/utils.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_other_transformer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge/utils.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge.egg-info/SOURCES.txt +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/ratings/test_player_rating_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/ratings/test_player_rating_no_mutation.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/ratings/test_utils_scaled_weights.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/test_autopipeline.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.25 → spforge-0.8.26}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.26
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.26"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -263,6 +263,7 @@ class BaseScorer(ABC):
|
|
|
263
263
|
validation_column: str | None,
|
|
264
264
|
filters: list[Filter] | None = None,
|
|
265
265
|
aggregation_level: list[str] | None = None,
|
|
266
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
266
267
|
granularity: list[str] | None = None,
|
|
267
268
|
compare_to_naive: bool = False,
|
|
268
269
|
naive_granularity: list[str] | None = None,
|
|
@@ -274,6 +275,7 @@ class BaseScorer(ABC):
|
|
|
274
275
|
If set, the scorer will be calculated only once the values of the validation column are equal to 1
|
|
275
276
|
:param filters: The filters to apply before calculating
|
|
276
277
|
:param aggregation_level: The columns to group by before calculating the score (e.g., group from game-player to game-team)
|
|
278
|
+
:param aggregation_method: Aggregation methods for pred/target when aggregation_level is set.
|
|
277
279
|
:param granularity: The columns to calculate separate scores for each unique combination (e.g., different scores for each team)
|
|
278
280
|
"""
|
|
279
281
|
self.target = target
|
|
@@ -289,28 +291,59 @@ class BaseScorer(ABC):
|
|
|
289
291
|
)
|
|
290
292
|
)
|
|
291
293
|
self.aggregation_level = aggregation_level
|
|
294
|
+
self.aggregation_method = aggregation_method
|
|
292
295
|
self.granularity = granularity
|
|
293
296
|
self.compare_to_naive = compare_to_naive
|
|
294
297
|
self.naive_granularity = naive_granularity
|
|
295
298
|
|
|
299
|
+
def _resolve_aggregation_method(self, key: str) -> Any:
|
|
300
|
+
if self.aggregation_method is None:
|
|
301
|
+
return "sum"
|
|
302
|
+
method = self.aggregation_method.get(key)
|
|
303
|
+
if method is None:
|
|
304
|
+
return "sum"
|
|
305
|
+
return method
|
|
306
|
+
|
|
307
|
+
def _build_aggregation_expr(self, df: IntoFrameT, col: str, method: Any) -> Any:
|
|
308
|
+
if isinstance(method, tuple):
|
|
309
|
+
if len(method) != 2 or method[0] != "weighted_mean":
|
|
310
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
311
|
+
weight_col = method[1]
|
|
312
|
+
if weight_col not in df.columns:
|
|
313
|
+
raise ValueError(
|
|
314
|
+
f"Aggregation weight column '{weight_col}' not found in dataframe columns."
|
|
315
|
+
)
|
|
316
|
+
weighted_sum = (nw.col(col) * nw.col(weight_col)).sum()
|
|
317
|
+
weight_total = nw.col(weight_col).sum()
|
|
318
|
+
return (weighted_sum / weight_total).alias(col)
|
|
319
|
+
|
|
320
|
+
if method == "sum":
|
|
321
|
+
return nw.col(col).sum().alias(col)
|
|
322
|
+
if method == "mean":
|
|
323
|
+
return nw.col(col).mean().alias(col)
|
|
324
|
+
if method == "first":
|
|
325
|
+
return nw.col(col).first().alias(col)
|
|
326
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
327
|
+
|
|
296
328
|
def _apply_aggregation_level(self, df: IntoFrameT) -> IntoFrameT:
|
|
297
329
|
"""Apply aggregation_level grouping if set"""
|
|
298
330
|
if self.aggregation_level:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
agg_exprs = [
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
# Use sum for aggregation
|
|
306
|
-
agg_exprs.append(nw.col(col).sum().alias(col))
|
|
307
|
-
except Exception:
|
|
308
|
-
# Fallback to mean or first
|
|
309
|
-
agg_exprs.append(nw.col(col).mean().alias(col))
|
|
310
|
-
|
|
331
|
+
pred_method = self._resolve_aggregation_method("pred")
|
|
332
|
+
target_method = self._resolve_aggregation_method("target")
|
|
333
|
+
agg_exprs = [
|
|
334
|
+
self._build_aggregation_expr(df, self.pred_column, pred_method),
|
|
335
|
+
self._build_aggregation_expr(df, self.target, target_method),
|
|
336
|
+
]
|
|
311
337
|
df = df.group_by(self.aggregation_level).agg(agg_exprs)
|
|
312
338
|
return df
|
|
313
339
|
|
|
340
|
+
@narwhals.narwhalify
|
|
341
|
+
def aggregate(self, df: IntoFrameT) -> IntoFrameT:
|
|
342
|
+
df = apply_filters(df, self.filters)
|
|
343
|
+
if not hasattr(df, "to_native"):
|
|
344
|
+
df = nw.from_native(df)
|
|
345
|
+
return self._apply_aggregation_level(df)
|
|
346
|
+
|
|
314
347
|
def _get_granularity_groups(self, df: IntoFrameT) -> list[tuple]:
|
|
315
348
|
"""Get list of granularity tuples from dataframe"""
|
|
316
349
|
if not self.granularity:
|
|
@@ -345,6 +378,7 @@ class PWMSE(BaseScorer):
|
|
|
345
378
|
target: str,
|
|
346
379
|
validation_column: str | None = None,
|
|
347
380
|
aggregation_level: list[str] | None = None,
|
|
381
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
348
382
|
granularity: list[str] | None = None,
|
|
349
383
|
filters: list[Filter] | None = None,
|
|
350
384
|
labels: list[int] | None = None,
|
|
@@ -357,6 +391,7 @@ class PWMSE(BaseScorer):
|
|
|
357
391
|
target=target,
|
|
358
392
|
pred_column=pred_column,
|
|
359
393
|
aggregation_level=aggregation_level,
|
|
394
|
+
aggregation_method=aggregation_method,
|
|
360
395
|
granularity=granularity,
|
|
361
396
|
filters=filters,
|
|
362
397
|
validation_column=validation_column,
|
|
@@ -454,12 +489,7 @@ class PWMSE(BaseScorer):
|
|
|
454
489
|
|
|
455
490
|
pass
|
|
456
491
|
else:
|
|
457
|
-
df =
|
|
458
|
-
[
|
|
459
|
-
nw.col(self.pred_column).mean().alias(self.pred_column),
|
|
460
|
-
nw.col(self.target).mean().alias(self.target),
|
|
461
|
-
]
|
|
462
|
-
)
|
|
492
|
+
df = self._apply_aggregation_level(df)
|
|
463
493
|
|
|
464
494
|
if self.granularity:
|
|
465
495
|
results = {}
|
|
@@ -517,6 +547,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
517
547
|
target: str,
|
|
518
548
|
validation_column: str | None = None,
|
|
519
549
|
aggregation_level: list[str] | None = None,
|
|
550
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
520
551
|
granularity: list[str] | None = None,
|
|
521
552
|
filters: list[Filter] | None = None,
|
|
522
553
|
labels: list[int] | None = None,
|
|
@@ -540,6 +571,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
540
571
|
target=target,
|
|
541
572
|
pred_column=pred_column,
|
|
542
573
|
aggregation_level=aggregation_level,
|
|
574
|
+
aggregation_method=aggregation_method,
|
|
543
575
|
granularity=granularity,
|
|
544
576
|
filters=filters,
|
|
545
577
|
validation_column=validation_column,
|
|
@@ -582,12 +614,7 @@ class MeanBiasScorer(BaseScorer):
|
|
|
582
614
|
|
|
583
615
|
# Apply aggregation_level if set
|
|
584
616
|
if self.aggregation_level:
|
|
585
|
-
df =
|
|
586
|
-
[
|
|
587
|
-
nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
|
|
588
|
-
nw.col(self.target).sum().alias(self.target),
|
|
589
|
-
]
|
|
590
|
-
)
|
|
617
|
+
df = self._apply_aggregation_level(df)
|
|
591
618
|
# After group_by, ensure df is still a Narwhals DataFrame
|
|
592
619
|
if not hasattr(df, "to_native"):
|
|
593
620
|
df = nw.from_native(df)
|
|
@@ -658,6 +685,7 @@ class SklearnScorer(BaseScorer):
|
|
|
658
685
|
target: str,
|
|
659
686
|
validation_column: str | None = None,
|
|
660
687
|
aggregation_level: list[str] | None = None,
|
|
688
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
661
689
|
granularity: list[str] | None = None,
|
|
662
690
|
filters: list[Filter] | None = None,
|
|
663
691
|
params: dict[str, Any] = None,
|
|
@@ -679,6 +707,7 @@ class SklearnScorer(BaseScorer):
|
|
|
679
707
|
target=target,
|
|
680
708
|
pred_column=pred_column,
|
|
681
709
|
aggregation_level=aggregation_level,
|
|
710
|
+
aggregation_method=aggregation_method,
|
|
682
711
|
granularity=granularity,
|
|
683
712
|
filters=filters,
|
|
684
713
|
validation_column=validation_column,
|
|
@@ -756,12 +785,7 @@ class SklearnScorer(BaseScorer):
|
|
|
756
785
|
)
|
|
757
786
|
|
|
758
787
|
if self.aggregation_level:
|
|
759
|
-
df =
|
|
760
|
-
[
|
|
761
|
-
nw.col(self.pred_column_name).sum().alias(self.pred_column_name),
|
|
762
|
-
nw.col(self.target).sum().alias(self.target),
|
|
763
|
-
]
|
|
764
|
-
)
|
|
788
|
+
df = self._apply_aggregation_level(df)
|
|
765
789
|
if not hasattr(df, "to_native"):
|
|
766
790
|
df = nw.from_native(df)
|
|
767
791
|
|
|
@@ -798,6 +822,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
798
822
|
class_column_name: str = "classes",
|
|
799
823
|
validation_column: str | None = None,
|
|
800
824
|
aggregation_level: list[str] | None = None,
|
|
825
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
801
826
|
granularity: list[str] | None = None,
|
|
802
827
|
filters: list[Filter] | None = None,
|
|
803
828
|
compare_to_naive: bool = False,
|
|
@@ -810,6 +835,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
810
835
|
target=target,
|
|
811
836
|
pred_column=pred_column,
|
|
812
837
|
aggregation_level=aggregation_level,
|
|
838
|
+
aggregation_method=aggregation_method,
|
|
813
839
|
granularity=granularity,
|
|
814
840
|
filters=filters,
|
|
815
841
|
validation_column=validation_column,
|
|
@@ -817,6 +843,49 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
817
843
|
naive_granularity=naive_granularity,
|
|
818
844
|
)
|
|
819
845
|
|
|
846
|
+
def _aggregate_pandas_series(
|
|
847
|
+
self, df: pd.DataFrame, col: str, method: Any
|
|
848
|
+
) -> pd.Series:
|
|
849
|
+
grouped = df.groupby(self.aggregation_level, dropna=False)
|
|
850
|
+
if isinstance(method, tuple):
|
|
851
|
+
if len(method) != 2 or method[0] != "weighted_mean":
|
|
852
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
853
|
+
weight_col = method[1]
|
|
854
|
+
if weight_col not in df.columns:
|
|
855
|
+
raise ValueError(
|
|
856
|
+
f"Aggregation weight column '{weight_col}' not found in dataframe columns."
|
|
857
|
+
)
|
|
858
|
+
return grouped.apply(
|
|
859
|
+
lambda g: (g[col] * g[weight_col]).sum() / g[weight_col].sum()
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
if method == "sum":
|
|
863
|
+
return grouped[col].sum()
|
|
864
|
+
if method == "mean":
|
|
865
|
+
return grouped[col].mean()
|
|
866
|
+
if method == "first":
|
|
867
|
+
return grouped[col].first()
|
|
868
|
+
raise ValueError(f"Unsupported aggregation method for {col}: {method}")
|
|
869
|
+
|
|
870
|
+
def _aggregate_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
871
|
+
if not self.aggregation_level:
|
|
872
|
+
return df
|
|
873
|
+
pred_method = self._resolve_aggregation_method("pred")
|
|
874
|
+
target_method = self._resolve_aggregation_method("target")
|
|
875
|
+
agg_df = pd.DataFrame(
|
|
876
|
+
{
|
|
877
|
+
self.pred_column: self._aggregate_pandas_series(
|
|
878
|
+
df, self.pred_column, pred_method
|
|
879
|
+
),
|
|
880
|
+
self.target: self._aggregate_pandas_series(df, self.target, target_method),
|
|
881
|
+
self.class_column_name: df.groupby(self.aggregation_level, dropna=False)[
|
|
882
|
+
self.class_column_name
|
|
883
|
+
].first(),
|
|
884
|
+
}
|
|
885
|
+
)
|
|
886
|
+
agg_df.reset_index(inplace=True)
|
|
887
|
+
return agg_df
|
|
888
|
+
|
|
820
889
|
def _calculate_score_for_group(self, df: pd.DataFrame) -> float:
|
|
821
890
|
"""Calculate score for a single group (used for granularity)"""
|
|
822
891
|
df = df.copy()
|
|
@@ -948,13 +1017,7 @@ class ProbabilisticMeanBias(BaseScorer):
|
|
|
948
1017
|
|
|
949
1018
|
# Apply aggregation_level if set
|
|
950
1019
|
if self.aggregation_level:
|
|
951
|
-
df = (
|
|
952
|
-
df.groupby(self.aggregation_level)
|
|
953
|
-
.agg(
|
|
954
|
-
{self.pred_column: "mean", self.target: "mean", self.class_column_name: "first"}
|
|
955
|
-
)
|
|
956
|
-
.reset_index()
|
|
957
|
-
)
|
|
1020
|
+
df = self._aggregate_pandas(df)
|
|
958
1021
|
|
|
959
1022
|
# If granularity is set, calculate separate scores per group
|
|
960
1023
|
if self.granularity:
|
|
@@ -995,6 +1058,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
995
1058
|
classes: list[int],
|
|
996
1059
|
validation_column: str | None = None,
|
|
997
1060
|
aggregation_level: list[str] | None = None,
|
|
1061
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
998
1062
|
granularity: list[str] | None = None,
|
|
999
1063
|
filters: list[Filter] | None = None,
|
|
1000
1064
|
labels: list[int] | None = None,
|
|
@@ -1006,6 +1070,7 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1006
1070
|
target=target,
|
|
1007
1071
|
pred_column=pred_column,
|
|
1008
1072
|
aggregation_level=aggregation_level,
|
|
1073
|
+
aggregation_method=aggregation_method,
|
|
1009
1074
|
granularity=granularity,
|
|
1010
1075
|
filters=filters,
|
|
1011
1076
|
validation_column=validation_column,
|
|
@@ -1102,14 +1167,10 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1102
1167
|
if not hasattr(df, "to_native"):
|
|
1103
1168
|
df = nw.from_native(df)
|
|
1104
1169
|
|
|
1105
|
-
df_native = df.to_native()
|
|
1106
|
-
df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
|
|
1107
|
-
|
|
1108
1170
|
# Filter out null and NaN targets
|
|
1109
|
-
before = len(
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
after = len(df_pl)
|
|
1171
|
+
before = len(df)
|
|
1172
|
+
df = _filter_nulls_and_nans(df, self.target)
|
|
1173
|
+
after = len(df)
|
|
1113
1174
|
if before != after:
|
|
1114
1175
|
_logger.info(
|
|
1115
1176
|
"OrdinalLossScorer: Dropped %d rows with NaN target (%d → %d)",
|
|
@@ -1119,12 +1180,12 @@ class OrdinalLossScorer(BaseScorer):
|
|
|
1119
1180
|
)
|
|
1120
1181
|
|
|
1121
1182
|
if self.aggregation_level:
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1183
|
+
df = self._apply_aggregation_level(df)
|
|
1184
|
+
|
|
1185
|
+
df_native = df.to_native()
|
|
1186
|
+
df_pl = pl.DataFrame(df_native) if isinstance(df_native, pd.DataFrame) else df_native
|
|
1187
|
+
if df_pl.is_empty():
|
|
1188
|
+
return {} if self.granularity else 0.0
|
|
1128
1189
|
|
|
1129
1190
|
if self.granularity:
|
|
1130
1191
|
results = {}
|
|
@@ -1197,6 +1258,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1197
1258
|
threshold_rounding: str = "ceil",
|
|
1198
1259
|
validation_column: str | None = None,
|
|
1199
1260
|
aggregation_level: list[str] | None = None,
|
|
1261
|
+
aggregation_method: dict[str, Any] | None = None,
|
|
1200
1262
|
granularity: list[str] | None = None,
|
|
1201
1263
|
filters: list["Filter"] | None = None,
|
|
1202
1264
|
compare_to_naive: bool = False,
|
|
@@ -1207,6 +1269,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1207
1269
|
target=self._EVENT_COL,
|
|
1208
1270
|
pred_column=dist_column,
|
|
1209
1271
|
aggregation_level=aggregation_level,
|
|
1272
|
+
aggregation_method=aggregation_method,
|
|
1210
1273
|
granularity=granularity,
|
|
1211
1274
|
filters=filters,
|
|
1212
1275
|
validation_column=validation_column,
|
|
@@ -1227,6 +1290,7 @@ class ThresholdEventScorer(BaseScorer):
|
|
|
1227
1290
|
target=self._EVENT_COL,
|
|
1228
1291
|
pred_column=self._P_EVENT_COL,
|
|
1229
1292
|
aggregation_level=aggregation_level,
|
|
1293
|
+
aggregation_method=aggregation_method,
|
|
1230
1294
|
granularity=granularity,
|
|
1231
1295
|
filters=None,
|
|
1232
1296
|
validation_column=validation_column,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.26
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -21,6 +21,16 @@ def create_dataframe(df_type, data: dict):
|
|
|
21
21
|
return df_type(data)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def to_pandas_df(df):
|
|
25
|
+
if hasattr(df, "to_native"):
|
|
26
|
+
df = df.to_native()
|
|
27
|
+
if isinstance(df, pd.DataFrame):
|
|
28
|
+
return df
|
|
29
|
+
if isinstance(df, pl.DataFrame):
|
|
30
|
+
return df.to_pandas()
|
|
31
|
+
raise TypeError(f"Unsupported dataframe type: {type(df)}")
|
|
32
|
+
|
|
33
|
+
|
|
24
34
|
# ============================================================================
|
|
25
35
|
# Aggregation Level Tests
|
|
26
36
|
# ============================================================================
|
|
@@ -104,6 +114,65 @@ def test_pwmse_aggregation_level(df_type):
|
|
|
104
114
|
assert score >= 0
|
|
105
115
|
|
|
106
116
|
|
|
117
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
118
|
+
def test_aggregate_returns_grouped_frame(df_type):
|
|
119
|
+
"""aggregate returns filtered, grouped dataframe with default sum behavior"""
|
|
120
|
+
df = create_dataframe(
|
|
121
|
+
df_type,
|
|
122
|
+
{
|
|
123
|
+
"game_id": [1, 1, 1, 1],
|
|
124
|
+
"player_id": [1, 2, 3, 4],
|
|
125
|
+
"team_id": [1, 1, 2, 2],
|
|
126
|
+
"pred": [0.5, 0.6, 0.3, 0.4],
|
|
127
|
+
"target": [0, 1, 0, 1],
|
|
128
|
+
},
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
scorer = MeanBiasScorer(
|
|
132
|
+
pred_column="pred", target="target", aggregation_level=["game_id", "team_id"]
|
|
133
|
+
)
|
|
134
|
+
aggregated = to_pandas_df(scorer.aggregate(df))
|
|
135
|
+
|
|
136
|
+
assert len(aggregated) == 2
|
|
137
|
+
team1 = aggregated[aggregated["team_id"] == 1].iloc[0]
|
|
138
|
+
team2 = aggregated[aggregated["team_id"] == 2].iloc[0]
|
|
139
|
+
assert abs(team1["pred"] - 1.1) < 1e-10
|
|
140
|
+
assert abs(team1["target"] - 1.0) < 1e-10
|
|
141
|
+
assert abs(team2["pred"] - 0.7) < 1e-10
|
|
142
|
+
assert abs(team2["target"] - 1.0) < 1e-10
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@pytest.mark.parametrize("df_type", [pl.DataFrame, pd.DataFrame])
|
|
146
|
+
def test_weighted_mean_aggregation_method(df_type):
|
|
147
|
+
"""Weighted mean aggregation uses provided weight column"""
|
|
148
|
+
df = create_dataframe(
|
|
149
|
+
df_type,
|
|
150
|
+
{
|
|
151
|
+
"game_id": [1, 1, 1],
|
|
152
|
+
"team_id": [1, 1, 1],
|
|
153
|
+
"pred": [0.4, 0.6, 0.9],
|
|
154
|
+
"target": [0.5, 0.7, 0.2],
|
|
155
|
+
"attempts": [10, 20, 30],
|
|
156
|
+
},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
scorer = MeanBiasScorer(
|
|
160
|
+
pred_column="pred",
|
|
161
|
+
target="target",
|
|
162
|
+
aggregation_level=["game_id", "team_id"],
|
|
163
|
+
aggregation_method={
|
|
164
|
+
"pred": ("weighted_mean", "attempts"),
|
|
165
|
+
"target": ("weighted_mean", "attempts"),
|
|
166
|
+
},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
score = scorer.score(df)
|
|
170
|
+
expected_pred = (0.4 * 10 + 0.6 * 20 + 0.9 * 30) / 60
|
|
171
|
+
expected_target = (0.5 * 10 + 0.7 * 20 + 0.2 * 30) / 60
|
|
172
|
+
expected = expected_pred - expected_target
|
|
173
|
+
assert abs(score - expected) < 1e-10
|
|
174
|
+
|
|
175
|
+
|
|
107
176
|
# ============================================================================
|
|
108
177
|
# Granularity Tests (Separate Scores Per Group)
|
|
109
178
|
# ============================================================================
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/spforge/distributions/_student_t_distribution_estimator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/spforge/performance_transformers/_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/tests/feature_generator/test_regressor_feature_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/tests/performance_transformers/test_performance_manager.py
RENAMED
|
File without changes
|
{spforge-0.8.25 → spforge-0.8.26}/tests/performance_transformers/test_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|