wavetrainer 0.0.28__tar.gz → 0.0.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.0.28/wavetrainer.egg-info → wavetrainer-0.0.30}/PKG-INFO +1 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/setup.py +1 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/trainer_test.py +1 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_model.py +11 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model.py +7 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model_router.py +7 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/tabpfn_model.py +4 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/trainer.py +60 -18
- {wavetrainer-0.0.28 → wavetrainer-0.0.30/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/LICENSE +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/MANIFEST.in +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/README.md +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/requirements.txt +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/setup.cfg +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/model/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/create.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_kwargs.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/params.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
23
23
|
|
24
24
|
setup(
|
25
25
|
name='wavetrainer',
|
26
|
-
version='0.0.
|
26
|
+
version='0.0.30',
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
28
28
|
long_description=long_description,
|
29
29
|
long_description_content_type='text/markdown',
|
@@ -72,6 +72,16 @@ class CatboostModel(Model):
|
|
72
72
|
def supports_importances(self) -> bool:
|
73
73
|
return True
|
74
74
|
|
75
|
+
@property
|
76
|
+
def feature_importances(self) -> dict[str, float]:
|
77
|
+
catboost = self._provide_catboost()
|
78
|
+
importances = catboost.get_feature_importance(prettified=True)
|
79
|
+
if importances is None:
|
80
|
+
raise ValueError("importances is null")
|
81
|
+
feature_ids = importances["Feature Id"].to_list() # type: ignore
|
82
|
+
importances = importances["Importances"].to_list() # type: ignore
|
83
|
+
return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
|
84
|
+
|
75
85
|
def pre_fit(
|
76
86
|
self,
|
77
87
|
df: pd.DataFrame,
|
@@ -165,7 +175,7 @@ class CatboostModel(Model):
|
|
165
175
|
label=eval_y,
|
166
176
|
cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
|
167
177
|
)
|
168
|
-
if eval_x is not None
|
178
|
+
if eval_x is not None and self._best_iteration is not None
|
169
179
|
else None
|
170
180
|
)
|
171
181
|
catboost.fit(
|
@@ -37,6 +37,13 @@ class Model(Params, Fit):
|
|
37
37
|
"supports_importances not implemented in parent class."
|
38
38
|
)
|
39
39
|
|
40
|
+
@property
|
41
|
+
def feature_importances(self) -> dict[str, float]:
|
42
|
+
"""The feature importances of this model."""
|
43
|
+
raise NotImplementedError(
|
44
|
+
"feature_importances not implemented in parent class."
|
45
|
+
)
|
46
|
+
|
40
47
|
def pre_fit(
|
41
48
|
self,
|
42
49
|
df: pd.DataFrame,
|
@@ -52,6 +52,13 @@ class ModelRouter(Model):
|
|
52
52
|
raise ValueError("model is null")
|
53
53
|
return model.supports_importances
|
54
54
|
|
55
|
+
@property
|
56
|
+
def feature_importances(self) -> dict[str, float]:
|
57
|
+
model = self._model
|
58
|
+
if model is None:
|
59
|
+
raise ValueError("model is null")
|
60
|
+
return model.feature_importances
|
61
|
+
|
55
62
|
def pre_fit(
|
56
63
|
self,
|
57
64
|
df: pd.DataFrame,
|
@@ -28,6 +28,7 @@ from .windower.windower import Windower
|
|
28
28
|
_SAMPLER_FILENAME = "sampler.pkl"
|
29
29
|
_STUDYDB_FILENAME = "study.db"
|
30
30
|
_PARAMS_FILENAME = "params.json"
|
31
|
+
_TRIAL_FILENAME = "trial.json"
|
31
32
|
_TRIALS_KEY = "trials"
|
32
33
|
_WALKFORWARD_TIMEDELTA_KEY = "walkforward_timedelta"
|
33
34
|
_DAYS_KEY = "days"
|
@@ -38,6 +39,13 @@ _IDX_USR_ATTR_KEY = "idx"
|
|
38
39
|
_DT_COLUMN_KEY = "dt_column"
|
39
40
|
|
40
41
|
|
42
|
+
def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
|
43
|
+
for i in range(len(bins) - 1):
|
44
|
+
if bins[i] <= timestamp < bins[i + 1]:
|
45
|
+
return i
|
46
|
+
return len(bins) - 2 # Assign to last bin if at the end
|
47
|
+
|
48
|
+
|
41
49
|
class Trainer(Fit):
|
42
50
|
"""A class for training and predicting from an array of data."""
|
43
51
|
|
@@ -198,6 +206,20 @@ class Trainer(Fit):
|
|
198
206
|
) -> float:
|
199
207
|
print(f"Beginning trial for: {split_idx.isoformat()}")
|
200
208
|
trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
|
209
|
+
folder = os.path.join(
|
210
|
+
self._folder, str(y_series.name), split_idx.isoformat()
|
211
|
+
)
|
212
|
+
os.makedirs(folder, exist_ok=True)
|
213
|
+
trial_file = os.path.join(folder, _TRIAL_FILENAME)
|
214
|
+
if os.path.exists(trial_file):
|
215
|
+
with open(trial_file, encoding="utf8") as handle:
|
216
|
+
trial_info = json.load(handle)
|
217
|
+
if trial_info["number"] == trial.number:
|
218
|
+
logging.info(
|
219
|
+
"Found trial %d previously executed, skipping...",
|
220
|
+
trial.number,
|
221
|
+
)
|
222
|
+
return trial_info["output"]
|
201
223
|
|
202
224
|
train_dt_index = dt_index[: len(x)]
|
203
225
|
x_train = x[train_dt_index < split_idx] # type: ignore
|
@@ -247,24 +269,32 @@ class Trainer(Fit):
|
|
247
269
|
calibrator.set_options(trial, x)
|
248
270
|
calibrator.fit(x_pred, y=y_train)
|
249
271
|
|
272
|
+
# Output
|
273
|
+
y_pred = model.transform(x_test)
|
274
|
+
y_pred = calibrator.transform(y_pred)
|
275
|
+
output = 0.0
|
276
|
+
if determine_model_type(y_series) == ModelType.REGRESSION:
|
277
|
+
output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
278
|
+
else:
|
279
|
+
output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
|
280
|
+
|
250
281
|
if save:
|
251
|
-
folder = os.path.join(
|
252
|
-
self._folder, str(y_series.name), split_idx.isoformat()
|
253
|
-
)
|
254
|
-
if not os.path.exists(folder):
|
255
|
-
os.mkdir(folder)
|
256
282
|
windower.save(folder, trial)
|
257
283
|
reducer.save(folder, trial)
|
258
284
|
weights.save(folder, trial)
|
259
285
|
model.save(folder, trial)
|
260
286
|
selector.save(folder, trial)
|
261
287
|
calibrator.save(folder, trial)
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
288
|
+
with open(trial_file, "w", encoding="utf8") as handle:
|
289
|
+
json.dump(
|
290
|
+
{
|
291
|
+
"number": trial.number,
|
292
|
+
"output": output,
|
293
|
+
},
|
294
|
+
handle,
|
295
|
+
)
|
296
|
+
|
297
|
+
return output
|
268
298
|
except WavetrainException as exc:
|
269
299
|
logging.warning(str(exc))
|
270
300
|
return -1.0
|
@@ -403,12 +433,6 @@ class Trainer(Fit):
|
|
403
433
|
+ [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
|
404
434
|
)
|
405
435
|
|
406
|
-
def assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
|
407
|
-
for i in range(len(bins) - 1):
|
408
|
-
if bins[i] <= timestamp < bins[i + 1]:
|
409
|
-
return i
|
410
|
-
return len(bins) - 2 # Assign to last bin if at the end
|
411
|
-
|
412
436
|
def perform_predictions(
|
413
437
|
group: pd.DataFrame,
|
414
438
|
column_path: str,
|
@@ -448,7 +472,7 @@ class Trainer(Fit):
|
|
448
472
|
|
449
473
|
old_index = dt_index.copy()
|
450
474
|
df = df.groupby(
|
451
|
-
dt_index.map(functools.partial(
|
475
|
+
dt_index.map(functools.partial(_assign_bin, bins=bins))
|
452
476
|
).progress_apply( # type: ignore
|
453
477
|
functools.partial(
|
454
478
|
perform_predictions,
|
@@ -466,3 +490,21 @@ class Trainer(Fit):
|
|
466
490
|
df[col] = input_df[col]
|
467
491
|
|
468
492
|
return df
|
493
|
+
|
494
|
+
def feature_importances(self) -> dict[str, dict[str, float]]:
|
495
|
+
"""Find the feature importances for the rolling models."""
|
496
|
+
feature_importances = {}
|
497
|
+
|
498
|
+
for column in os.listdir(self._folder):
|
499
|
+
column_path = os.path.join(self._folder, column)
|
500
|
+
if not os.path.isdir(column_path):
|
501
|
+
continue
|
502
|
+
for date_str in os.listdir(column_path):
|
503
|
+
date_path = os.path.join(column_path, date_str)
|
504
|
+
if not os.path.isdir(date_path):
|
505
|
+
continue
|
506
|
+
model = ModelRouter()
|
507
|
+
model.load(date_path)
|
508
|
+
feature_importances[date_str] = model.feature_importances
|
509
|
+
|
510
|
+
return feature_importances
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|