wavetrainer 0.0.29__tar.gz → 0.0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {wavetrainer-0.0.29/wavetrainer.egg-info → wavetrainer-0.0.31}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/setup.py +1 -1
  3. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/trainer_test.py +1 -1
  4. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/__init__.py +1 -1
  5. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_model.py +11 -1
  6. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/model.py +7 -0
  7. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/model_router.py +7 -0
  8. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/tabpfn_model.py +4 -0
  9. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/combined_reducer.py +9 -0
  10. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/trainer.py +43 -9
  11. {wavetrainer-0.0.29 → wavetrainer-0.0.31/wavetrainer.egg-info}/PKG-INFO +1 -1
  12. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/LICENSE +0 -0
  13. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/MANIFEST.in +0 -0
  14. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/README.md +0 -0
  15. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/requirements.txt +0 -0
  16. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/setup.cfg +0 -0
  17. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/__init__.py +0 -0
  18. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/model/__init__.py +0 -0
  19. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/tests/model/catboost_kwargs_test.py +0 -0
  20. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/__init__.py +0 -0
  21. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/calibrator.py +0 -0
  22. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/calibrator_router.py +0 -0
  23. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  24. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  25. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/create.py +0 -0
  26. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/exceptions.py +0 -0
  27. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/fit.py +0 -0
  28. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/__init__.py +0 -0
  29. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  30. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_kwargs.py +0 -0
  31. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  32. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/model_type.py +0 -0
  33. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/params.py +0 -0
  34. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/__init__.py +0 -0
  35. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  36. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/constant_reducer.py +0 -0
  37. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/correlation_reducer.py +0 -0
  38. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  39. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  40. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/reducer.py +0 -0
  41. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  42. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/reducer/unseen_reducer.py +0 -0
  43. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/selector/__init__.py +0 -0
  44. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/selector/selector.py +0 -0
  45. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/__init__.py +0 -0
  46. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/class_weights.py +0 -0
  47. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/combined_weights.py +0 -0
  48. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/exponential_weights.py +0 -0
  49. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/linear_weights.py +0 -0
  50. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/noop_weights.py +0 -0
  51. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/sigmoid_weights.py +0 -0
  52. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/weights.py +0 -0
  53. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/weights/weights_router.py +0 -0
  54. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/windower/__init__.py +0 -0
  55. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer/windower/windower.py +0 -0
  56. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/SOURCES.txt +0 -0
  57. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/dependency_links.txt +0 -0
  58. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/not-zip-safe +0 -0
  59. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/requires.txt +0 -0
  60. {wavetrainer-0.0.29 → wavetrainer-0.0.31}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.29
3
+ Version: 0.0.31
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.29',
26
+ version='0.0.31',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -58,4 +58,4 @@ class TestTrainer(unittest.TestCase):
58
58
  trainer.fit(df, y=y)
59
59
  df = trainer.transform(df)
60
60
  print("df:")
61
- print(df)
61
+ print(df)
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.29"
5
+ __VERSION__ = "0.0.31"
6
6
  __all__ = ("create",)
@@ -72,6 +72,16 @@ class CatboostModel(Model):
72
72
  def supports_importances(self) -> bool:
73
73
  return True
74
74
 
75
+ @property
76
+ def feature_importances(self) -> dict[str, float]:
77
+ catboost = self._provide_catboost()
78
+ importances = catboost.get_feature_importance(prettified=True)
79
+ if importances is None:
80
+ raise ValueError("importances is null")
81
+ feature_ids = importances["Feature Id"].to_list() # type: ignore
82
+ importances = importances["Importances"].to_list() # type: ignore
83
+ return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
84
+
75
85
  def pre_fit(
76
86
  self,
77
87
  df: pd.DataFrame,
@@ -165,7 +175,7 @@ class CatboostModel(Model):
165
175
  label=eval_y,
166
176
  cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
167
177
  )
168
- if eval_x is not None
178
+ if eval_x is not None and self._best_iteration is not None
169
179
  else None
170
180
  )
171
181
  catboost.fit(
@@ -37,6 +37,13 @@ class Model(Params, Fit):
37
37
  "supports_importances not implemented in parent class."
38
38
  )
39
39
 
40
+ @property
41
+ def feature_importances(self) -> dict[str, float]:
42
+ """The feature importances of this model."""
43
+ raise NotImplementedError(
44
+ "feature_importances not implemented in parent class."
45
+ )
46
+
40
47
  def pre_fit(
41
48
  self,
42
49
  df: pd.DataFrame,
@@ -52,6 +52,13 @@ class ModelRouter(Model):
52
52
  raise ValueError("model is null")
53
53
  return model.supports_importances
54
54
 
55
+ @property
56
+ def feature_importances(self) -> dict[str, float]:
57
+ model = self._model
58
+ if model is None:
59
+ raise ValueError("model is null")
60
+ return model.feature_importances
61
+
55
62
  def pre_fit(
56
63
  self,
57
64
  df: pd.DataFrame,
@@ -48,6 +48,10 @@ class TabPFNModel(Model):
48
48
  def supports_importances(self) -> bool:
49
49
  return False
50
50
 
51
+ @property
52
+ def feature_importances(self) -> dict[str, float]:
53
+ return {}
54
+
51
55
  def pre_fit(
52
56
  self,
53
57
  df: pd.DataFrame,
@@ -90,7 +90,16 @@ class CombinedReducer(Reducer):
90
90
  eval_y: pd.Series | pd.DataFrame | None = None,
91
91
  ) -> Self:
92
92
  for reducer in self._reducers:
93
+ before_columns = set(df.columns.values)
93
94
  df = reducer.fit_transform(df)
95
+ after_columns = set(df.columns.values)
96
+ removed_columns = before_columns.difference(after_columns)
97
+ if removed_columns:
98
+ logging.info(
99
+ "Removed columns %s using %s",
100
+ ",".join(removed_columns),
101
+ reducer.name(),
102
+ )
94
103
  return self
95
104
 
96
105
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -37,6 +37,14 @@ _TEST_SIZE_KEY = "test_size"
37
37
  _VALIDATION_SIZE_KEY = "validation_size"
38
38
  _IDX_USR_ATTR_KEY = "idx"
39
39
  _DT_COLUMN_KEY = "dt_column"
40
+ _BAD_OUTPUT = -1.0
41
+
42
+
43
+ def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
44
+ for i in range(len(bins) - 1):
45
+ if bins[i] <= timestamp < bins[i + 1]:
46
+ return i
47
+ return len(bins) - 2 # Assign to last bin if at the end
40
48
 
41
49
 
42
50
  class Trainer(Fit):
@@ -202,6 +210,7 @@ class Trainer(Fit):
202
210
  folder = os.path.join(
203
211
  self._folder, str(y_series.name), split_idx.isoformat()
204
212
  )
213
+ new_folder = os.path.exists(folder)
205
214
  os.makedirs(folder, exist_ok=True)
206
215
  trial_file = os.path.join(folder, _TRIAL_FILENAME)
207
216
  if os.path.exists(trial_file):
@@ -227,8 +236,10 @@ class Trainer(Fit):
227
236
  x_train = windower.fit_transform(x_train)
228
237
  y_train = y_train[-len(x_train) :]
229
238
  if len(y_train.unique()) <= 1:
239
+ if new_folder:
240
+ os.removedirs(folder)
230
241
  logging.warning("Y train only contains 1 unique datapoint.")
231
- return -1.0
242
+ return _BAD_OUTPUT
232
243
 
233
244
  # Perform common reductions
234
245
  reducer = CombinedReducer()
@@ -290,7 +301,9 @@ class Trainer(Fit):
290
301
  return output
291
302
  except WavetrainException as exc:
292
303
  logging.warning(str(exc))
293
- return -1.0
304
+ if new_folder:
305
+ os.removedirs(folder)
306
+ return _BAD_OUTPUT
294
307
 
295
308
  start_validation_index = (
296
309
  dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
@@ -332,6 +345,15 @@ class Trainer(Fit):
332
345
  if self._max_train_timeout is None
333
346
  else self._max_train_timeout.total_seconds(),
334
347
  )
348
+ while study.best_trial.value is None or study.best_trial.value != _BAD_OUTPUT:
349
+ study.optimize(
350
+ test_objective,
351
+ n_trials=1,
352
+ show_progress_bar=True,
353
+ timeout=None
354
+ if self._max_train_timeout is None
355
+ else self._max_train_timeout.total_seconds(),
356
+ )
335
357
 
336
358
  train_len = len(df[dt_index < start_test_index])
337
359
  test_len = len(
@@ -426,12 +448,6 @@ class Trainer(Fit):
426
448
  + [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
427
449
  )
428
450
 
429
- def assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
430
- for i in range(len(bins) - 1):
431
- if bins[i] <= timestamp < bins[i + 1]:
432
- return i
433
- return len(bins) - 2 # Assign to last bin if at the end
434
-
435
451
  def perform_predictions(
436
452
  group: pd.DataFrame,
437
453
  column_path: str,
@@ -471,7 +487,7 @@ class Trainer(Fit):
471
487
 
472
488
  old_index = dt_index.copy()
473
489
  df = df.groupby(
474
- dt_index.map(functools.partial(assign_bin, bins=bins))
490
+ dt_index.map(functools.partial(_assign_bin, bins=bins))
475
491
  ).progress_apply( # type: ignore
476
492
  functools.partial(
477
493
  perform_predictions,
@@ -489,3 +505,21 @@ class Trainer(Fit):
489
505
  df[col] = input_df[col]
490
506
 
491
507
  return df
508
+
509
+ def feature_importances(self) -> dict[str, dict[str, float]]:
510
+ """Find the feature importances for the rolling models."""
511
+ feature_importances = {}
512
+
513
+ for column in os.listdir(self._folder):
514
+ column_path = os.path.join(self._folder, column)
515
+ if not os.path.isdir(column_path):
516
+ continue
517
+ for date_str in os.listdir(column_path):
518
+ date_path = os.path.join(column_path, date_str)
519
+ if not os.path.isdir(date_path):
520
+ continue
521
+ model = ModelRouter()
522
+ model.load(date_path)
523
+ feature_importances[date_str] = model.feature_importances
524
+
525
+ return feature_importances
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.29
3
+ Version: 0.0.31
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes