wavetrainer 0.0.6__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.6/wavetrainer.egg-info → wavetrainer-0.0.8}/PKG-INFO +1 -1
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/setup.py +1 -1
- wavetrainer-0.0.8/tests/model/catboost_kwargs_test.py +28 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/tests/trainer_test.py +2 -1
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/mapie_calibrator.py +1 -1
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_classifier_wrap.py +1 -1
- wavetrainer-0.0.8/wavetrainer/model/catboost_kwargs.py +50 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_model.py +9 -4
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_regressor_wrap.py +1 -1
- wavetrainer-0.0.8/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/SOURCES.txt +2 -0
- wavetrainer-0.0.6/wavetrainer/model/catboost_kwargs.py +0 -35
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/LICENSE +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/MANIFEST.in +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/README.md +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/requirements.txt +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/setup.cfg +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/tests/__init__.py +0 -0
- {wavetrainer-0.0.6/wavetrainer/weights → wavetrainer-0.0.8/tests/model}/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/trainer.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.8',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""Tests for the catboost kwargs handler class."""
|
2
|
+
import unittest
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
from wavetrainer.model.catboost_kwargs import handle_fit_kwargs
|
7
|
+
|
8
|
+
|
9
|
+
class TestCatboostKwargs(unittest.TestCase):
|
10
|
+
|
11
|
+
def test_handle_fit_kwargs(self):
|
12
|
+
x_train = pd.DataFrame(data={
|
13
|
+
"thing": [0.0, 1.0, 2.0, 3.0, 4.0],
|
14
|
+
})
|
15
|
+
x_train["thing"] = x_train["thing"].astype('category')
|
16
|
+
y_train = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
|
17
|
+
x_test = pd.DataFrame(data={
|
18
|
+
"thing": [0.0, 1.0, 2.0, 3.0, 4.0],
|
19
|
+
})
|
20
|
+
x_test["thing"] = x_test["thing"].astype('category')
|
21
|
+
y_test = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
|
22
|
+
args, _ = handle_fit_kwargs(
|
23
|
+
x_train,
|
24
|
+
y_train,
|
25
|
+
eval_set=(x_test, y_test),
|
26
|
+
cat_features=x_train.select_dtypes(include="category").columns.tolist(),
|
27
|
+
)
|
28
|
+
assert len(args) == 2
|
@@ -20,10 +20,11 @@ class TestTrainer(unittest.TestCase):
|
|
20
20
|
data={
|
21
21
|
"column1": x_data,
|
22
22
|
"column2": [(x * random.random()) + random.random() for x in x_data],
|
23
|
-
"column3": [(x / random.random()) - random.random() for x in x_data],
|
23
|
+
"column3": [int(((x / random.random()) - random.random()) * 1000.0) for x in x_data],
|
24
24
|
},
|
25
25
|
index=x_index,
|
26
26
|
)
|
27
|
+
df["column3"] = df["column3"].astype('category')
|
27
28
|
y = pd.DataFrame(
|
28
29
|
data={
|
29
30
|
"y": [x % 2 == 0 for x in x_data],
|
@@ -60,7 +60,7 @@ class MAPIECalibrator(Calibrator):
|
|
60
60
|
try:
|
61
61
|
alpha = []
|
62
62
|
for potential_alpha in [0.05, 0.32]:
|
63
|
-
if len(df) > int(1.0 / potential_alpha):
|
63
|
+
if len(df) > int(1.0 / potential_alpha) + 1:
|
64
64
|
alpha.append(potential_alpha)
|
65
65
|
if alpha:
|
66
66
|
_, y_pis = self._mapie.predict(df, alpha=alpha)
|
@@ -11,5 +11,5 @@ class CatBoostClassifierWrapper(CatBoostClassifier):
|
|
11
11
|
"""A wrapper for the catboost classifier."""
|
12
12
|
|
13
13
|
def fit(self, *args, **kwargs):
|
14
|
-
kwargs = handle_fit_kwargs(*args, **kwargs)
|
14
|
+
args, kwargs = handle_fit_kwargs(*args, **kwargs)
|
15
15
|
return super().fit(*args, **kwargs)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
"""A list of constant catboost kwargs."""
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
from catboost import Pool # type: ignore
|
7
|
+
|
8
|
+
ORIGINAL_X_ARG_KEY = "original_x"
|
9
|
+
EVAL_SET_ARG_KEY = "eval_set"
|
10
|
+
CAT_FEATURES_ARG_KEY = "cat_features"
|
11
|
+
|
12
|
+
|
13
|
+
def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
|
14
|
+
"""Handles keyword args coming into a catboost fit method."""
|
15
|
+
if ORIGINAL_X_ARG_KEY in kwargs:
|
16
|
+
df = kwargs[ORIGINAL_X_ARG_KEY]
|
17
|
+
eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
|
18
|
+
cat_features = kwargs[CAT_FEATURES_ARG_KEY]
|
19
|
+
args_list = list(args)
|
20
|
+
fit_x = args_list[0]
|
21
|
+
fix_x_cp = fit_x.copy()
|
22
|
+
|
23
|
+
# Stupid code to ensure eval is feature equivalent to train data
|
24
|
+
included_columns = []
|
25
|
+
for i in range(fix_x_cp.shape[1]):
|
26
|
+
arr_col_values = fix_x_cp[:, i]
|
27
|
+
for col in df.columns:
|
28
|
+
if col in included_columns:
|
29
|
+
continue
|
30
|
+
df_col_values = df[col].values
|
31
|
+
if np.allclose(df_col_values, arr_col_values, equal_nan=True):
|
32
|
+
included_columns.append(col)
|
33
|
+
break
|
34
|
+
# We also need to update cat_features or catboost will yell at us
|
35
|
+
cat_features = list(
|
36
|
+
set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
|
37
|
+
)
|
38
|
+
args_list[0] = df[included_columns]
|
39
|
+
args = tuple(args_list)
|
40
|
+
|
41
|
+
eval_x = eval_x[included_columns]
|
42
|
+
kwargs[EVAL_SET_ARG_KEY] = Pool(
|
43
|
+
eval_x,
|
44
|
+
label=eval_y,
|
45
|
+
cat_features=cat_features,
|
46
|
+
)
|
47
|
+
kwargs[CAT_FEATURES_ARG_KEY] = cat_features
|
48
|
+
|
49
|
+
del kwargs[ORIGINAL_X_ARG_KEY]
|
50
|
+
return args, kwargs
|
@@ -10,7 +10,8 @@ from catboost import CatBoost, Pool # type: ignore
|
|
10
10
|
|
11
11
|
from ..model_type import ModelType, determine_model_type
|
12
12
|
from .catboost_classifier_wrap import CatBoostClassifierWrapper
|
13
|
-
from .catboost_kwargs import
|
13
|
+
from .catboost_kwargs import (CAT_FEATURES_ARG_KEY, EVAL_SET_ARG_KEY,
|
14
|
+
ORIGINAL_X_ARG_KEY)
|
14
15
|
from .catboost_regressor_wrap import CatBoostRegressorWrapper
|
15
16
|
from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
|
16
17
|
|
@@ -66,9 +67,9 @@ class CatboostModel(Model):
|
|
66
67
|
raise ValueError("y is null.")
|
67
68
|
self._model_type = determine_model_type(y)
|
68
69
|
return {
|
69
|
-
|
70
|
-
|
71
|
-
|
70
|
+
EVAL_SET_ARG_KEY: (eval_x, eval_y),
|
71
|
+
CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
|
72
|
+
ORIGINAL_X_ARG_KEY: df,
|
72
73
|
}
|
73
74
|
|
74
75
|
def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
|
@@ -122,6 +123,8 @@ class CatboostModel(Model):
|
|
122
123
|
) -> Self:
|
123
124
|
if y is None:
|
124
125
|
raise ValueError("y is null.")
|
126
|
+
if eval_x is None:
|
127
|
+
raise ValueError("eval_x is null.")
|
125
128
|
self._model_type = determine_model_type(y)
|
126
129
|
catboost = self._provide_catboost()
|
127
130
|
|
@@ -129,10 +132,12 @@ class CatboostModel(Model):
|
|
129
132
|
df,
|
130
133
|
label=y,
|
131
134
|
weight=w,
|
135
|
+
cat_features=df.select_dtypes(include="category").columns.tolist(),
|
132
136
|
)
|
133
137
|
eval_pool = Pool(
|
134
138
|
eval_x,
|
135
139
|
label=eval_y,
|
140
|
+
cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
|
136
141
|
)
|
137
142
|
catboost.fit(
|
138
143
|
train_pool,
|
@@ -9,5 +9,5 @@ class CatBoostRegressorWrapper(CatBoostRegressor):
|
|
9
9
|
"""A wrapper for the catboost regressor."""
|
10
10
|
|
11
11
|
def fit(self, *args, **kwargs):
|
12
|
-
kwargs = handle_fit_kwargs(*args, **kwargs)
|
12
|
+
args, kwargs = handle_fit_kwargs(*args, **kwargs)
|
13
13
|
return super().fit(*args, **kwargs)
|
File without changes
|
@@ -1,35 +0,0 @@
|
|
1
|
-
"""A list of constant catboost kwargs."""
|
2
|
-
|
3
|
-
from typing import Any
|
4
|
-
|
5
|
-
import numpy as np
|
6
|
-
from catboost import Pool # type: ignore
|
7
|
-
|
8
|
-
ORIGINAL_X = "original_x"
|
9
|
-
EVAL_SET = "eval_set"
|
10
|
-
|
11
|
-
|
12
|
-
def handle_fit_kwargs(*args, **kwargs) -> dict[str, Any]:
|
13
|
-
"""Handles keyword args coming into a catboost fit method."""
|
14
|
-
if ORIGINAL_X in kwargs:
|
15
|
-
df = kwargs[ORIGINAL_X]
|
16
|
-
eval_x, eval_y = kwargs[EVAL_SET]
|
17
|
-
fit_x = args[0]
|
18
|
-
fix_x_cp = fit_x.copy()
|
19
|
-
|
20
|
-
# Stupid code to ensure eval is feature equivalent to train data
|
21
|
-
included_columns = []
|
22
|
-
for i in range(fix_x_cp.shape[1]):
|
23
|
-
arr_col_values = fix_x_cp[:, i]
|
24
|
-
for col in df.columns:
|
25
|
-
df_col_values = df[col].values
|
26
|
-
if np.allclose(df_col_values, arr_col_values, equal_nan=True):
|
27
|
-
included_columns.append(col)
|
28
|
-
df = df.drop(col, axis=1)
|
29
|
-
break
|
30
|
-
|
31
|
-
eval_x = eval_x[included_columns]
|
32
|
-
kwargs[EVAL_SET] = Pool(eval_x, label=eval_y)
|
33
|
-
|
34
|
-
del kwargs[ORIGINAL_X]
|
35
|
-
return kwargs
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|