wavetrainer 0.0.29__tar.gz → 0.0.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.29/wavetrainer.egg-info → wavetrainer-0.0.31}/PKG-INFO +1 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/setup.py +1 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/trainer_test.py +1 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_model.py +11 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/model.py +7 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/model_router.py +7 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/tabpfn_model.py +4 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/combined_reducer.py +9 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/trainer.py +43 -9
- {wavetrainer-0.0.29 → wavetrainer-0.0.31/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/LICENSE +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/MANIFEST.in +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/README.md +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/requirements.txt +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/setup.cfg +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.31',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -72,6 +72,16 @@ class CatboostModel(Model):
|
|
72
72
|
def supports_importances(self) -> bool:
|
73
73
|
return True
|
74
74
|
|
75
|
+
@property
|
76
|
+
def feature_importances(self) -> dict[str, float]:
|
77
|
+
catboost = self._provide_catboost()
|
78
|
+
importances = catboost.get_feature_importance(prettified=True)
|
79
|
+
if importances is None:
|
80
|
+
raise ValueError("importances is null")
|
81
|
+
feature_ids = importances["Feature Id"].to_list() # type: ignore
|
82
|
+
importances = importances["Importances"].to_list() # type: ignore
|
83
|
+
return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
|
84
|
+
|
75
85
|
def pre_fit(
|
76
86
|
self,
|
77
87
|
df: pd.DataFrame,
|
@@ -165,7 +175,7 @@ class CatboostModel(Model):
|
|
165
175
|
label=eval_y,
|
166
176
|
cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
|
167
177
|
)
|
168
|
-
if eval_x is not None
|
178
|
+
if eval_x is not None and self._best_iteration is not None
|
169
179
|
else None
|
170
180
|
)
|
171
181
|
catboost.fit(
|
@@ -37,6 +37,13 @@ class Model(Params, Fit):
|
|
37
37
|
"supports_importances not implemented in parent class."
|
38
38
|
)
|
39
39
|
|
40
|
+
@property
|
41
|
+
def feature_importances(self) -> dict[str, float]:
|
42
|
+
"""The feature importances of this model."""
|
43
|
+
raise NotImplementedError(
|
44
|
+
"feature_importances not implemented in parent class."
|
45
|
+
)
|
46
|
+
|
40
47
|
def pre_fit(
|
41
48
|
self,
|
42
49
|
df: pd.DataFrame,
|
@@ -52,6 +52,13 @@ class ModelRouter(Model):
|
|
52
52
|
raise ValueError("model is null")
|
53
53
|
return model.supports_importances
|
54
54
|
|
55
|
+
@property
|
56
|
+
def feature_importances(self) -> dict[str, float]:
|
57
|
+
model = self._model
|
58
|
+
if model is None:
|
59
|
+
raise ValueError("model is null")
|
60
|
+
return model.feature_importances
|
61
|
+
|
55
62
|
def pre_fit(
|
56
63
|
self,
|
57
64
|
df: pd.DataFrame,
|
@@ -90,7 +90,16 @@ class CombinedReducer(Reducer):
|
|
90
90
|
eval_y: pd.Series | pd.DataFrame | None = None,
|
91
91
|
) -> Self:
|
92
92
|
for reducer in self._reducers:
|
93
|
+
before_columns = set(df.columns.values)
|
93
94
|
df = reducer.fit_transform(df)
|
95
|
+
after_columns = set(df.columns.values)
|
96
|
+
removed_columns = before_columns.difference(after_columns)
|
97
|
+
if removed_columns:
|
98
|
+
logging.info(
|
99
|
+
"Removed columns %s using %s",
|
100
|
+
",".join(removed_columns),
|
101
|
+
reducer.name(),
|
102
|
+
)
|
94
103
|
return self
|
95
104
|
|
96
105
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
@@ -37,6 +37,14 @@ _TEST_SIZE_KEY = "test_size"
|
|
37
37
|
_VALIDATION_SIZE_KEY = "validation_size"
|
38
38
|
_IDX_USR_ATTR_KEY = "idx"
|
39
39
|
_DT_COLUMN_KEY = "dt_column"
|
40
|
+
_BAD_OUTPUT = -1.0
|
41
|
+
|
42
|
+
|
43
|
+
def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
|
44
|
+
for i in range(len(bins) - 1):
|
45
|
+
if bins[i] <= timestamp < bins[i + 1]:
|
46
|
+
return i
|
47
|
+
return len(bins) - 2 # Assign to last bin if at the end
|
40
48
|
|
41
49
|
|
42
50
|
class Trainer(Fit):
|
@@ -202,6 +210,7 @@ class Trainer(Fit):
|
|
202
210
|
folder = os.path.join(
|
203
211
|
self._folder, str(y_series.name), split_idx.isoformat()
|
204
212
|
)
|
213
|
+
new_folder = os.path.exists(folder)
|
205
214
|
os.makedirs(folder, exist_ok=True)
|
206
215
|
trial_file = os.path.join(folder, _TRIAL_FILENAME)
|
207
216
|
if os.path.exists(trial_file):
|
@@ -227,8 +236,10 @@ class Trainer(Fit):
|
|
227
236
|
x_train = windower.fit_transform(x_train)
|
228
237
|
y_train = y_train[-len(x_train) :]
|
229
238
|
if len(y_train.unique()) <= 1:
|
239
|
+
if new_folder:
|
240
|
+
os.removedirs(folder)
|
230
241
|
logging.warning("Y train only contains 1 unique datapoint.")
|
231
|
-
return
|
242
|
+
return _BAD_OUTPUT
|
232
243
|
|
233
244
|
# Perform common reductions
|
234
245
|
reducer = CombinedReducer()
|
@@ -290,7 +301,9 @@ class Trainer(Fit):
|
|
290
301
|
return output
|
291
302
|
except WavetrainException as exc:
|
292
303
|
logging.warning(str(exc))
|
293
|
-
|
304
|
+
if new_folder:
|
305
|
+
os.removedirs(folder)
|
306
|
+
return _BAD_OUTPUT
|
294
307
|
|
295
308
|
start_validation_index = (
|
296
309
|
dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
|
@@ -332,6 +345,15 @@ class Trainer(Fit):
|
|
332
345
|
if self._max_train_timeout is None
|
333
346
|
else self._max_train_timeout.total_seconds(),
|
334
347
|
)
|
348
|
+
while study.best_trial.value is None or study.best_trial.value != _BAD_OUTPUT:
|
349
|
+
study.optimize(
|
350
|
+
test_objective,
|
351
|
+
n_trials=1,
|
352
|
+
show_progress_bar=True,
|
353
|
+
timeout=None
|
354
|
+
if self._max_train_timeout is None
|
355
|
+
else self._max_train_timeout.total_seconds(),
|
356
|
+
)
|
335
357
|
|
336
358
|
train_len = len(df[dt_index < start_test_index])
|
337
359
|
test_len = len(
|
@@ -426,12 +448,6 @@ class Trainer(Fit):
|
|
426
448
|
+ [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
|
427
449
|
)
|
428
450
|
|
429
|
-
def assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
|
430
|
-
for i in range(len(bins) - 1):
|
431
|
-
if bins[i] <= timestamp < bins[i + 1]:
|
432
|
-
return i
|
433
|
-
return len(bins) - 2 # Assign to last bin if at the end
|
434
|
-
|
435
451
|
def perform_predictions(
|
436
452
|
group: pd.DataFrame,
|
437
453
|
column_path: str,
|
@@ -471,7 +487,7 @@ class Trainer(Fit):
|
|
471
487
|
|
472
488
|
old_index = dt_index.copy()
|
473
489
|
df = df.groupby(
|
474
|
-
dt_index.map(functools.partial(
|
490
|
+
dt_index.map(functools.partial(_assign_bin, bins=bins))
|
475
491
|
).progress_apply( # type: ignore
|
476
492
|
functools.partial(
|
477
493
|
perform_predictions,
|
@@ -489,3 +505,21 @@ class Trainer(Fit):
|
|
489
505
|
df[col] = input_df[col]
|
490
506
|
|
491
507
|
return df
|
508
|
+
|
509
|
+
def feature_importances(self) -> dict[str, dict[str, float]]:
|
510
|
+
"""Find the feature importances for the rolling models."""
|
511
|
+
feature_importances = {}
|
512
|
+
|
513
|
+
for column in os.listdir(self._folder):
|
514
|
+
column_path = os.path.join(self._folder, column)
|
515
|
+
if not os.path.isdir(column_path):
|
516
|
+
continue
|
517
|
+
for date_str in os.listdir(column_path):
|
518
|
+
date_path = os.path.join(column_path, date_str)
|
519
|
+
if not os.path.isdir(date_path):
|
520
|
+
continue
|
521
|
+
model = ModelRouter()
|
522
|
+
model.load(date_path)
|
523
|
+
feature_importances[date_str] = model.feature_importances
|
524
|
+
|
525
|
+
return feature_importances
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|