spforge 0.8.17__tar.gz → 0.8.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spforge might be problematic. Click here for more details.
- {spforge-0.8.17/spforge.egg-info → spforge-0.8.18}/PKG-INFO +1 -1
- {spforge-0.8.17 → spforge-0.8.18}/pyproject.toml +1 -1
- {spforge-0.8.17 → spforge-0.8.18}/spforge/autopipeline.py +11 -1
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_group_by_estimator.py +11 -3
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_other_transformer.py +38 -8
- {spforge-0.8.17 → spforge-0.8.18/spforge.egg-info}/PKG-INFO +1 -1
- {spforge-0.8.17 → spforge-0.8.18}/tests/test_autopipeline.py +143 -7
- {spforge-0.8.17 → spforge-0.8.18}/LICENSE +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/MANIFEST.in +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/README.md +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/game_level_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/lol/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/lol/data/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/lol/data/subsample_lol_data.parquet +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/lol/data/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/lol/pipeline_transformer_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/cross_validation_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/data/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/data/game_player_subsample.parquet +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/data/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/feature_engineering_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/game_winner_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/examples/nba/predictor_transformers_example.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/setup.cfg +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/base_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/cross_validator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/cross_validator/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/cross_validator/cross_validator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/data_structures.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/distributions/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/distributions/_negative_binomial_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/distributions/_normal_distribution_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/distributions/_student_t_distribution_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_conditional_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_frequency_bucketing_classifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_granularity_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_ordinal_classifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/estimator/_sklearn_enhancer_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_lag.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_regressor_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_rolling_against_opponent.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_rolling_mean_binary.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_rolling_mean_days.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_rolling_window.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/feature_generator/_utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/features_generator_pipeline.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/hyperparameter_tuning/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/hyperparameter_tuning/_default_search_spaces.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/hyperparameter_tuning/_tuner.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/performance_transformers/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/performance_transformers/_performance_manager.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/performance_transformers/_performances_transformers.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/_player_rating.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/_team_rating.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/enums.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/league_identifier.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/league_start_rating_optimizer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/player_performance_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/start_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/team_performance_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/team_start_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/ratings/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/scorer/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/scorer/_score.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/__init__.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_base.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_operator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_simple_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/transformers/_team_ratio_predictor.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge/utils.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge.egg-info/SOURCES.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge.egg-info/dependency_links.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge.egg-info/requires.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/spforge.egg-info/top_level.txt +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/cross_validator/test_cross_validator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/distributions/test_distribution.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_estimator_hyperparameter_tuning.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_league_start_rating_optimizer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_lol_player_kills.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_nba_player_points.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_nba_prediction_consistency.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/estimator/test_sklearn_estimator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_lag.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_regressor_feature_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_rolling_against_opponent.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_rolling_mean_binary.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_rolling_mean_days.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_rolling_window.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/hyperparameter_tuning/test_estimator_tuner.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/hyperparameter_tuning/test_rating_tuner.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/performance_transformers/test_performance_manager.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/performance_transformers/test_performances_transformers.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/ratings/test_player_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/ratings/test_player_rating_no_mutation.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/ratings/test_ratings_property.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/ratings/test_team_rating_generator.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/ratings/test_utils_scaled_weights.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/scorer/test_score.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/scorer/test_score_aggregation_granularity.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/test_autopipeline_context.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/test_feature_generator_pipeline.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_estimator_transformer_context.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_net_over_predicted.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_other_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_predictor_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_simple_transformer.py +0 -0
- {spforge-0.8.17 → spforge-0.8.18}/tests/transformers/test_team_ratio_predictor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.18
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "spforge"
|
|
7
|
-
version = "0.8.
|
|
7
|
+
version = "0.8.18"
|
|
8
8
|
description = "A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -236,6 +236,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
236
236
|
estimator_features: list[str],
|
|
237
237
|
predictor_transformers: list[PredictorTransformer] | None = None,
|
|
238
238
|
granularity: list[str] | None = None,
|
|
239
|
+
aggregation_weight: str | None = None,
|
|
239
240
|
filters: list[Filter] | None = None,
|
|
240
241
|
scale_features: bool = False,
|
|
241
242
|
categorical_handling: CategoricalHandling = "auto",
|
|
@@ -250,6 +251,7 @@ class AutoPipeline(BaseEstimator):
|
|
|
250
251
|
self.estimator_features = estimator_features
|
|
251
252
|
self.feature_names = estimator_features # Internal compat
|
|
252
253
|
self.granularity = granularity or []
|
|
254
|
+
self.aggregation_weight = aggregation_weight
|
|
253
255
|
self.predictor_transformers = predictor_transformers
|
|
254
256
|
self.estimator = estimator
|
|
255
257
|
self.filters = filters or []
|
|
@@ -326,6 +328,10 @@ class AutoPipeline(BaseEstimator):
|
|
|
326
328
|
# Add granularity columns
|
|
327
329
|
context.extend(self.granularity)
|
|
328
330
|
|
|
331
|
+
# Add aggregation weight column
|
|
332
|
+
if self.aggregation_weight:
|
|
333
|
+
context.append(self.aggregation_weight)
|
|
334
|
+
|
|
329
335
|
# Add filter columns
|
|
330
336
|
self._filter_feature_names = []
|
|
331
337
|
for f in self.filters:
|
|
@@ -492,7 +498,11 @@ class AutoPipeline(BaseEstimator):
|
|
|
492
498
|
pre = PreprocessorToDataFrame(pre_raw)
|
|
493
499
|
|
|
494
500
|
est = (
|
|
495
|
-
GroupByEstimator(
|
|
501
|
+
GroupByEstimator(
|
|
502
|
+
self.estimator,
|
|
503
|
+
granularity=[f"{c}" for c in self.granularity],
|
|
504
|
+
aggregation_weight=self.aggregation_weight,
|
|
505
|
+
)
|
|
496
506
|
if do_groupby
|
|
497
507
|
else self.estimator
|
|
498
508
|
)
|
|
@@ -10,10 +10,16 @@ from spforge.transformers._other_transformer import GroupByReducer
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class GroupByEstimator(BaseEstimator):
|
|
13
|
-
def __init__(
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
estimator: Any,
|
|
16
|
+
granularity: list[str] | None = None,
|
|
17
|
+
aggregation_weight: str | None = None,
|
|
18
|
+
):
|
|
14
19
|
self.estimator = estimator
|
|
15
20
|
self.granularity = granularity or []
|
|
16
|
-
self.
|
|
21
|
+
self.aggregation_weight = aggregation_weight
|
|
22
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=aggregation_weight)
|
|
17
23
|
self._est = None
|
|
18
24
|
|
|
19
25
|
def __sklearn_is_fitted__(self):
|
|
@@ -22,7 +28,9 @@ class GroupByEstimator(BaseEstimator):
|
|
|
22
28
|
@nw.narwhalify
|
|
23
29
|
def fit(self, X: IntoFrameT, y: Any, sample_weight: np.ndarray | None = None):
|
|
24
30
|
X = X.to_pandas()
|
|
25
|
-
|
|
31
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
32
|
+
agg_weight = getattr(self, "aggregation_weight", None)
|
|
33
|
+
self._reducer = GroupByReducer(self.granularity, aggregation_weight=agg_weight)
|
|
26
34
|
X_red = nw.from_native(self._reducer.fit_transform(X))
|
|
27
35
|
y_red, sw_red = self._reducer.reduce_y(X, y, sample_weight=sample_weight)
|
|
28
36
|
|
|
@@ -8,8 +8,9 @@ from sklearn.base import BaseEstimator, TransformerMixin
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
11
|
-
def __init__(self, granularity: list[str]):
|
|
11
|
+
def __init__(self, granularity: list[str], aggregation_weight: str | None = None):
|
|
12
12
|
self.granularity = granularity
|
|
13
|
+
self.aggregation_weight = aggregation_weight
|
|
13
14
|
|
|
14
15
|
@nw.narwhalify
|
|
15
16
|
def fit(self, X: IntoFrameT, y: Any = None):
|
|
@@ -26,18 +27,47 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
26
27
|
raise ValueError("Could not find granularity columns in dataframe %s", self.granularity)
|
|
27
28
|
|
|
28
29
|
non_keys = [c for c in df.columns if c not in keys]
|
|
29
|
-
|
|
30
|
+
schema = df.schema
|
|
31
|
+
num_cols = [c for c in non_keys if schema[c].is_numeric()]
|
|
30
32
|
other_cols = [c for c in non_keys if c not in num_cols]
|
|
31
33
|
|
|
32
34
|
aggs: list[nw.Expr] = []
|
|
33
35
|
|
|
36
|
+
# Backwards compatibility: old pickled objects may not have aggregation_weight
|
|
37
|
+
weight_col = getattr(self, "aggregation_weight", None)
|
|
38
|
+
has_weight = weight_col and weight_col in df.columns
|
|
39
|
+
|
|
34
40
|
for c in num_cols:
|
|
35
|
-
|
|
41
|
+
if c == weight_col:
|
|
42
|
+
aggs.append(nw.col(c).sum().alias(c))
|
|
43
|
+
elif has_weight:
|
|
44
|
+
aggs.append((nw.col(c) * nw.col(weight_col)).sum().alias(f"__{c}_weighted_sum"))
|
|
45
|
+
aggs.append(nw.col(c).mean().alias(f"__{c}_fallback"))
|
|
46
|
+
else:
|
|
47
|
+
aggs.append(nw.col(c).mean().alias(c))
|
|
36
48
|
|
|
37
49
|
for c in other_cols:
|
|
38
50
|
aggs.append(nw.col(c).first().alias(c))
|
|
39
51
|
|
|
52
|
+
if has_weight:
|
|
53
|
+
aggs.append(nw.col(weight_col).sum().alias("__weight_sum"))
|
|
54
|
+
|
|
40
55
|
out = df.group_by(keys).agg(aggs)
|
|
56
|
+
|
|
57
|
+
if has_weight:
|
|
58
|
+
weighted_cols = [c for c in num_cols if c != weight_col]
|
|
59
|
+
for c in weighted_cols:
|
|
60
|
+
out = out.with_columns(
|
|
61
|
+
nw.when((~nw.col("__weight_sum").is_null()) & (nw.col("__weight_sum") != 0))
|
|
62
|
+
.then(nw.col(f"__{c}_weighted_sum") / nw.col("__weight_sum"))
|
|
63
|
+
.otherwise(nw.col(f"__{c}_fallback"))
|
|
64
|
+
.alias(c)
|
|
65
|
+
)
|
|
66
|
+
drop_cols = [f"__{c}_weighted_sum" for c in weighted_cols]
|
|
67
|
+
drop_cols += [f"__{c}_fallback" for c in weighted_cols]
|
|
68
|
+
drop_cols.append("__weight_sum")
|
|
69
|
+
out = out.drop(drop_cols)
|
|
70
|
+
|
|
41
71
|
return out
|
|
42
72
|
|
|
43
73
|
@nw.narwhalify
|
|
@@ -59,12 +89,12 @@ class GroupByReducer(BaseEstimator, TransformerMixin):
|
|
|
59
89
|
if sample_weight is not None:
|
|
60
90
|
df = df.with_columns(nw.lit(sample_weight).alias("__sw"))
|
|
61
91
|
|
|
62
|
-
|
|
92
|
+
y_uniques = df.group_by(keys).agg(nw.col("__y").n_unique().alias("__y_nunique"))
|
|
93
|
+
non_uniform = y_uniques.filter(nw.col("__y_nunique") > 1)
|
|
94
|
+
if len(non_uniform) > 0:
|
|
95
|
+
raise ValueError("Target (y) must be uniform within each granularity group")
|
|
63
96
|
|
|
64
|
-
|
|
65
|
-
agg_exprs = [nw.col("__y").mean().alias("__y")]
|
|
66
|
-
else:
|
|
67
|
-
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
97
|
+
agg_exprs = [nw.col("__y").first().alias("__y")]
|
|
68
98
|
|
|
69
99
|
if sample_weight is not None:
|
|
70
100
|
agg_exprs.append(nw.col("__sw").sum().alias("__sw"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spforge
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.18
|
|
4
4
|
Summary: A flexible framework for generating features, ratings, and building machine learning or other models for training and inference on sports data.
|
|
5
5
|
Author-email: Mathias Holmstrøm <mathiasholmstom@gmail.com>
|
|
6
6
|
License: See LICENSE file
|
|
@@ -328,7 +328,18 @@ def test_infer_categorical_from_feature_names_when_only_numeric_features_given(d
|
|
|
328
328
|
assert any(c.startswith("cat") for c in cap.fit_columns)
|
|
329
329
|
|
|
330
330
|
|
|
331
|
-
def test_granularity_groups_rows_before_estimator_fit_and_predict(
|
|
331
|
+
def test_granularity_groups_rows_before_estimator_fit_and_predict(frame):
|
|
332
|
+
df_pd = pd.DataFrame(
|
|
333
|
+
{
|
|
334
|
+
"gameid": ["g1", "g1", "g2", "g2", "g3", "g3"],
|
|
335
|
+
"num1": [1.0, 2.0, np.nan, 4.0, 5.0, 6.0],
|
|
336
|
+
"num2": [10.0, 20.0, 30.0, 40.0, np.nan, 60.0],
|
|
337
|
+
"cat1": ["a", "b", "a", None, "b", "c"],
|
|
338
|
+
"y": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0],
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
342
|
+
|
|
332
343
|
model = AutoPipeline(
|
|
333
344
|
estimator=CaptureEstimator(),
|
|
334
345
|
estimator_features=["gameid", "num1", "num2", "cat1"],
|
|
@@ -339,16 +350,16 @@ def test_granularity_groups_rows_before_estimator_fit_and_predict(df_reg):
|
|
|
339
350
|
remainder="drop",
|
|
340
351
|
)
|
|
341
352
|
|
|
342
|
-
X = _select(
|
|
343
|
-
y = _col(
|
|
353
|
+
X = _select(df, ["gameid", "num1", "num2", "cat1"])
|
|
354
|
+
y = _col(df, "y")
|
|
344
355
|
model.fit(X, y=y)
|
|
345
356
|
|
|
346
357
|
inner = _inner_estimator(model)
|
|
347
358
|
|
|
348
|
-
if isinstance(
|
|
349
|
-
n_groups =
|
|
359
|
+
if isinstance(df, pl.DataFrame):
|
|
360
|
+
n_groups = df.select(pl.col("gameid").n_unique()).item()
|
|
350
361
|
else:
|
|
351
|
-
n_groups =
|
|
362
|
+
n_groups = df["gameid"].nunique()
|
|
352
363
|
|
|
353
364
|
assert inner.fit_shape[0] == n_groups
|
|
354
365
|
|
|
@@ -724,9 +735,10 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
|
|
|
724
735
|
"gameid": ["g1", "g1", "g2", "g2"],
|
|
725
736
|
"num1": [1.0, 2.0, 3.0, 4.0],
|
|
726
737
|
"num2": [10.0, 20.0, 30.0, 40.0],
|
|
738
|
+
"y": [1.0, 1.0, 2.0, 2.0],
|
|
727
739
|
}
|
|
728
740
|
)
|
|
729
|
-
y =
|
|
741
|
+
y = df["y"]
|
|
730
742
|
|
|
731
743
|
model = AutoPipeline(
|
|
732
744
|
estimator=RandomForestRegressor(n_estimators=5, random_state=42),
|
|
@@ -745,3 +757,127 @@ def test_feature_importance_names__granularity_uses_deep_feature_names():
|
|
|
745
757
|
assert list(names.keys()) == list(inner.feature_names_in_)
|
|
746
758
|
assert "gameid" not in names
|
|
747
759
|
assert "const_pred" in names
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
763
|
+
def test_granularity_with_aggregation_weight__features_weighted(frame):
|
|
764
|
+
df_pd = pd.DataFrame(
|
|
765
|
+
{
|
|
766
|
+
"gameid": ["g1", "g1", "g2", "g2"],
|
|
767
|
+
"num1": [10.0, 30.0, 20.0, 40.0],
|
|
768
|
+
"weight": [0.25, 0.75, 0.5, 0.5],
|
|
769
|
+
"y": [1.0, 1.0, 2.0, 2.0],
|
|
770
|
+
}
|
|
771
|
+
)
|
|
772
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
773
|
+
|
|
774
|
+
cap = CaptureEstimator()
|
|
775
|
+
model = AutoPipeline(
|
|
776
|
+
estimator=cap,
|
|
777
|
+
estimator_features=["num1"],
|
|
778
|
+
granularity=["gameid"],
|
|
779
|
+
aggregation_weight="weight",
|
|
780
|
+
remainder="drop",
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
X = _select(df, ["gameid", "num1", "weight"])
|
|
784
|
+
y = _col(df, "y")
|
|
785
|
+
model.fit(X, y=y)
|
|
786
|
+
|
|
787
|
+
inner = _inner_estimator(model)
|
|
788
|
+
assert inner.fit_shape[0] == 2
|
|
789
|
+
|
|
790
|
+
preds = model.predict(X)
|
|
791
|
+
assert preds.shape[0] == len(X)
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
795
|
+
def test_granularity_aggregation_weight__weighted_mean_correct(frame):
|
|
796
|
+
df_pd = pd.DataFrame(
|
|
797
|
+
{
|
|
798
|
+
"gameid": ["g1", "g1"],
|
|
799
|
+
"num1": [10.0, 30.0],
|
|
800
|
+
"weight": [0.25, 0.75],
|
|
801
|
+
"y": [1.0, 1.0],
|
|
802
|
+
}
|
|
803
|
+
)
|
|
804
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
805
|
+
|
|
806
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
807
|
+
|
|
808
|
+
reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
|
|
809
|
+
transformed = reducer.fit_transform(df)
|
|
810
|
+
|
|
811
|
+
if frame == "pl":
|
|
812
|
+
num1_val = transformed["num1"].to_list()[0]
|
|
813
|
+
else:
|
|
814
|
+
num1_val = transformed["num1"].iloc[0]
|
|
815
|
+
|
|
816
|
+
expected = (10.0 * 0.25 + 30.0 * 0.75) / (0.25 + 0.75)
|
|
817
|
+
assert abs(num1_val - expected) < 1e-6
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
821
|
+
def test_reduce_y_raises_when_target_not_uniform_per_group(frame):
|
|
822
|
+
df_pd = pd.DataFrame(
|
|
823
|
+
{
|
|
824
|
+
"gameid": ["g1", "g1"],
|
|
825
|
+
"num1": [10.0, 30.0],
|
|
826
|
+
}
|
|
827
|
+
)
|
|
828
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
829
|
+
|
|
830
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
831
|
+
|
|
832
|
+
reducer = GroupByReducer(granularity=["gameid"])
|
|
833
|
+
|
|
834
|
+
y = np.array([1.0, 2.0])
|
|
835
|
+
with pytest.raises(ValueError, match="Target.*must be uniform"):
|
|
836
|
+
reducer.reduce_y(df, y)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
840
|
+
def test_reduce_y_works_when_target_uniform_per_group(frame):
|
|
841
|
+
df_pd = pd.DataFrame(
|
|
842
|
+
{
|
|
843
|
+
"gameid": ["g1", "g1", "g2", "g2"],
|
|
844
|
+
"num1": [10.0, 30.0, 20.0, 40.0],
|
|
845
|
+
}
|
|
846
|
+
)
|
|
847
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
848
|
+
|
|
849
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
850
|
+
|
|
851
|
+
reducer = GroupByReducer(granularity=["gameid"])
|
|
852
|
+
|
|
853
|
+
y = np.array([1.0, 1.0, 2.0, 2.0])
|
|
854
|
+
y_out, _ = reducer.reduce_y(df, y)
|
|
855
|
+
|
|
856
|
+
assert len(y_out) == 2
|
|
857
|
+
assert set(y_out) == {1.0, 2.0}
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
@pytest.mark.parametrize("frame", ["pd", "pl"])
|
|
861
|
+
def test_aggregation_weight_sums_weight_column(frame):
|
|
862
|
+
df_pd = pd.DataFrame(
|
|
863
|
+
{
|
|
864
|
+
"gameid": ["g1", "g1"],
|
|
865
|
+
"num1": [10.0, 30.0],
|
|
866
|
+
"weight": [0.25, 0.75],
|
|
867
|
+
"y": [1.0, 1.0],
|
|
868
|
+
}
|
|
869
|
+
)
|
|
870
|
+
df = df_pd if frame == "pd" else pl.from_pandas(df_pd)
|
|
871
|
+
|
|
872
|
+
from spforge.transformers._other_transformer import GroupByReducer
|
|
873
|
+
|
|
874
|
+
reducer = GroupByReducer(granularity=["gameid"], aggregation_weight="weight")
|
|
875
|
+
transformed = reducer.fit_transform(df)
|
|
876
|
+
|
|
877
|
+
if frame == "pl":
|
|
878
|
+
weight_val = transformed["weight"].to_list()[0]
|
|
879
|
+
else:
|
|
880
|
+
weight_val = transformed["weight"].iloc[0]
|
|
881
|
+
|
|
882
|
+
expected = 0.25 + 0.75
|
|
883
|
+
assert abs(weight_val - expected) < 1e-6
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/spforge/distributions/_student_t_distribution_estimator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/spforge/performance_transformers/_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/tests/end_to_end/test_nba_player_ratings_hyperparameter_tuning.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/tests/feature_generator/test_regressor_feature_generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/tests/performance_transformers/test_performance_manager.py
RENAMED
|
File without changes
|
{spforge-0.8.17 → spforge-0.8.18}/tests/performance_transformers/test_performances_transformers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|