wavetrainer 0.1.17__tar.gz → 0.1.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.1.17/wavetrainer.egg-info → wavetrainer-0.1.19}/PKG-INFO +1 -1
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/setup.py +1 -1
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/tests/trainer_test.py +2 -1
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/calibrator/vennabers_calibrator.py +14 -5
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/combined_reducer.py +2 -2
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/correlation_reducer.py +1 -3
- {wavetrainer-0.1.17 → wavetrainer-0.1.19/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/LICENSE +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/MANIFEST.in +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/README.md +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/requirements.txt +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/setup.cfg +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/tests/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/tests/model/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/create.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_model.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/lightgbm/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/lightgbm/lightgbm_model.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/params.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/pca_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/trainer.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.1.
|
26
|
+
version='0.1.19',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -40,7 +40,7 @@ class TestTrainer(unittest.TestCase):
|
|
40
40
|
|
41
41
|
def test_trainer_dt_column(self):
|
42
42
|
with tempfile.TemporaryDirectory() as tmpdir:
|
43
|
-
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column")
|
43
|
+
trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=5, dt_column="dt_column", allowed_models={"catboost"})
|
44
44
|
x_data = [i for i in range(100)]
|
45
45
|
x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
|
46
46
|
df = pd.DataFrame(
|
@@ -55,6 +55,7 @@ class TestTrainer(unittest.TestCase):
|
|
55
55
|
},
|
56
56
|
index=df.index,
|
57
57
|
)
|
58
|
+
y["y"] = y["y"].astype(bool)
|
58
59
|
trainer.fit(df, y=y)
|
59
60
|
df = trainer.transform(df)
|
60
61
|
print("df:")
|
@@ -5,6 +5,7 @@ import os
|
|
5
5
|
from typing import Self
|
6
6
|
|
7
7
|
import joblib # type: ignore
|
8
|
+
import numpy as np
|
8
9
|
import optuna
|
9
10
|
import pandas as pd
|
10
11
|
from venn_abers import VennAbers # type: ignore
|
@@ -55,18 +56,26 @@ class VennabersCalibrator(Calibrator):
|
|
55
56
|
raise ValueError("vennabers is null")
|
56
57
|
if y is None:
|
57
58
|
raise ValueError("y is null")
|
58
|
-
prob_columns =
|
59
|
-
x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)
|
60
|
-
|
59
|
+
prob_columns = sorted(
|
60
|
+
[x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)]
|
61
|
+
)
|
62
|
+
probs = df[prob_columns].to_numpy()
|
61
63
|
try:
|
62
|
-
vennabers.fit(
|
64
|
+
vennabers.fit(probs, y.to_numpy())
|
63
65
|
except IndexError:
|
64
66
|
logging.error(df)
|
65
67
|
raise
|
66
68
|
return self
|
67
69
|
|
68
70
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
69
|
-
|
71
|
+
prob_columns = sorted(
|
72
|
+
[x for x in df.columns.values if x.startswith(PROBABILITY_COLUMN_PREFIX)]
|
73
|
+
)
|
74
|
+
probs = df[prob_columns].to_numpy()
|
75
|
+
p_prime, _ = self._vennabers.predict_proba(probs)
|
76
|
+
if np.mean(p_prime[:, 1] > 0.5) > 0.5 and np.mean(probs[:, 0] > 0.5) > 0.5:
|
77
|
+
print("⚠️ Warning: calibration seems inverted — flipping p_prime")
|
78
|
+
p_prime = p_prime[:, ::-1]
|
70
79
|
for i in range(p_prime.shape[1]):
|
71
80
|
prob = p_prime[:, i]
|
72
81
|
df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = prob
|
@@ -41,8 +41,8 @@ class CombinedReducer(Reducer):
|
|
41
41
|
UnseenReducer(),
|
42
42
|
NonNumericReducer(),
|
43
43
|
PCAReducer(embedding_cols),
|
44
|
-
ConstantReducer(),
|
45
|
-
DuplicateReducer(),
|
44
|
+
# ConstantReducer(),
|
45
|
+
# DuplicateReducer(),
|
46
46
|
CorrelationReducer(correlation_chunk_size=correlation_chunk_size),
|
47
47
|
SmartCorrelationReducer(),
|
48
48
|
# SelectBySingleFeaturePerformanceReducer(),
|
@@ -8,7 +8,6 @@ from typing import Self
|
|
8
8
|
import numpy as np
|
9
9
|
import optuna
|
10
10
|
import pandas as pd
|
11
|
-
from memory_profiler import profile # type: ignore
|
12
11
|
|
13
12
|
from .non_categorical_numeric_columns import \
|
14
13
|
find_non_categorical_numeric_columns
|
@@ -18,7 +17,6 @@ _CORRELATION_REDUCER_FILENAME = "correlation_reducer.json"
|
|
18
17
|
_CORRELATION_REDUCER_THRESHOLD = "correlation_reducer_threshold"
|
19
18
|
|
20
19
|
|
21
|
-
@profile
|
22
20
|
def _get_correlated_features_to_drop_chunked(
|
23
21
|
df: pd.DataFrame,
|
24
22
|
threshold: float = 0.85,
|
@@ -119,7 +117,7 @@ class CorrelationReducer(Reducer):
|
|
119
117
|
eval_y: pd.Series | pd.DataFrame | None = None,
|
120
118
|
) -> Self:
|
121
119
|
drop_features = _get_correlated_features_to_drop_chunked(
|
122
|
-
df
|
120
|
+
df,
|
123
121
|
threshold=self._threshold,
|
124
122
|
chunk_size=self._correlation_chunk_size,
|
125
123
|
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{wavetrainer-0.1.17 → wavetrainer-0.1.19}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|