wavetrainer 0.0.28__tar.gz → 0.0.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {wavetrainer-0.0.28/wavetrainer.egg-info → wavetrainer-0.0.30}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/setup.py +1 -1
  3. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/trainer_test.py +1 -1
  4. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/__init__.py +1 -1
  5. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_model.py +11 -1
  6. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model.py +7 -0
  7. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/model_router.py +7 -0
  8. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/tabpfn_model.py +4 -0
  9. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/trainer.py +60 -18
  10. {wavetrainer-0.0.28 → wavetrainer-0.0.30/wavetrainer.egg-info}/PKG-INFO +1 -1
  11. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/LICENSE +0 -0
  12. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/MANIFEST.in +0 -0
  13. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/README.md +0 -0
  14. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/requirements.txt +0 -0
  15. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/setup.cfg +0 -0
  16. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/__init__.py +0 -0
  17. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/model/__init__.py +0 -0
  18. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/tests/model/catboost_kwargs_test.py +0 -0
  19. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/__init__.py +0 -0
  20. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/calibrator.py +0 -0
  21. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/calibrator_router.py +0 -0
  22. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  23. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  24. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/create.py +0 -0
  25. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/exceptions.py +0 -0
  26. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/fit.py +0 -0
  27. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/__init__.py +0 -0
  28. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  29. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_kwargs.py +0 -0
  30. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  31. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/model_type.py +0 -0
  32. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/params.py +0 -0
  33. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/__init__.py +0 -0
  34. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  35. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/combined_reducer.py +0 -0
  36. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/constant_reducer.py +0 -0
  37. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/correlation_reducer.py +0 -0
  38. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  39. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  40. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/reducer.py +0 -0
  41. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  42. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/reducer/unseen_reducer.py +0 -0
  43. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/selector/__init__.py +0 -0
  44. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/selector/selector.py +0 -0
  45. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/__init__.py +0 -0
  46. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/class_weights.py +0 -0
  47. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/combined_weights.py +0 -0
  48. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/exponential_weights.py +0 -0
  49. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/linear_weights.py +0 -0
  50. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/noop_weights.py +0 -0
  51. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/sigmoid_weights.py +0 -0
  52. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/weights.py +0 -0
  53. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/weights/weights_router.py +0 -0
  54. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/windower/__init__.py +0 -0
  55. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer/windower/windower.py +0 -0
  56. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/SOURCES.txt +0 -0
  57. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/dependency_links.txt +0 -0
  58. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/not-zip-safe +0 -0
  59. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/requires.txt +0 -0
  60. {wavetrainer-0.0.28 → wavetrainer-0.0.30}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.28
3
+ Version: 0.0.30
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.28',
26
+ version='0.0.30',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -58,4 +58,4 @@ class TestTrainer(unittest.TestCase):
58
58
  trainer.fit(df, y=y)
59
59
  df = trainer.transform(df)
60
60
  print("df:")
61
- print(df)
61
+ print(df)
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.28"
5
+ __VERSION__ = "0.0.30"
6
6
  __all__ = ("create",)
@@ -72,6 +72,16 @@ class CatboostModel(Model):
72
72
  def supports_importances(self) -> bool:
73
73
  return True
74
74
 
75
+ @property
76
+ def feature_importances(self) -> dict[str, float]:
77
+ catboost = self._provide_catboost()
78
+ importances = catboost.get_feature_importance(prettified=True)
79
+ if importances is None:
80
+ raise ValueError("importances is null")
81
+ feature_ids = importances["Feature Id"].to_list() # type: ignore
82
+ importances = importances["Importances"].to_list() # type: ignore
83
+ return {feature_ids[x]: importances[x] for x in range(len(feature_ids))}
84
+
75
85
  def pre_fit(
76
86
  self,
77
87
  df: pd.DataFrame,
@@ -165,7 +175,7 @@ class CatboostModel(Model):
165
175
  label=eval_y,
166
176
  cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
167
177
  )
168
- if eval_x is not None
178
+ if eval_x is not None and self._best_iteration is not None
169
179
  else None
170
180
  )
171
181
  catboost.fit(
@@ -37,6 +37,13 @@ class Model(Params, Fit):
37
37
  "supports_importances not implemented in parent class."
38
38
  )
39
39
 
40
+ @property
41
+ def feature_importances(self) -> dict[str, float]:
42
+ """The feature importances of this model."""
43
+ raise NotImplementedError(
44
+ "feature_importances not implemented in parent class."
45
+ )
46
+
40
47
  def pre_fit(
41
48
  self,
42
49
  df: pd.DataFrame,
@@ -52,6 +52,13 @@ class ModelRouter(Model):
52
52
  raise ValueError("model is null")
53
53
  return model.supports_importances
54
54
 
55
+ @property
56
+ def feature_importances(self) -> dict[str, float]:
57
+ model = self._model
58
+ if model is None:
59
+ raise ValueError("model is null")
60
+ return model.feature_importances
61
+
55
62
  def pre_fit(
56
63
  self,
57
64
  df: pd.DataFrame,
@@ -48,6 +48,10 @@ class TabPFNModel(Model):
48
48
  def supports_importances(self) -> bool:
49
49
  return False
50
50
 
51
+ @property
52
+ def feature_importances(self) -> dict[str, float]:
53
+ return {}
54
+
51
55
  def pre_fit(
52
56
  self,
53
57
  df: pd.DataFrame,
@@ -28,6 +28,7 @@ from .windower.windower import Windower
28
28
  _SAMPLER_FILENAME = "sampler.pkl"
29
29
  _STUDYDB_FILENAME = "study.db"
30
30
  _PARAMS_FILENAME = "params.json"
31
+ _TRIAL_FILENAME = "trial.json"
31
32
  _TRIALS_KEY = "trials"
32
33
  _WALKFORWARD_TIMEDELTA_KEY = "walkforward_timedelta"
33
34
  _DAYS_KEY = "days"
@@ -38,6 +39,13 @@ _IDX_USR_ATTR_KEY = "idx"
38
39
  _DT_COLUMN_KEY = "dt_column"
39
40
 
40
41
 
42
+ def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
43
+ for i in range(len(bins) - 1):
44
+ if bins[i] <= timestamp < bins[i + 1]:
45
+ return i
46
+ return len(bins) - 2 # Assign to last bin if at the end
47
+
48
+
41
49
  class Trainer(Fit):
42
50
  """A class for training and predicting from an array of data."""
43
51
 
@@ -198,6 +206,20 @@ class Trainer(Fit):
198
206
  ) -> float:
199
207
  print(f"Beginning trial for: {split_idx.isoformat()}")
200
208
  trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
209
+ folder = os.path.join(
210
+ self._folder, str(y_series.name), split_idx.isoformat()
211
+ )
212
+ os.makedirs(folder, exist_ok=True)
213
+ trial_file = os.path.join(folder, _TRIAL_FILENAME)
214
+ if os.path.exists(trial_file):
215
+ with open(trial_file, encoding="utf8") as handle:
216
+ trial_info = json.load(handle)
217
+ if trial_info["number"] == trial.number:
218
+ logging.info(
219
+ "Found trial %d previously executed, skipping...",
220
+ trial.number,
221
+ )
222
+ return trial_info["output"]
201
223
 
202
224
  train_dt_index = dt_index[: len(x)]
203
225
  x_train = x[train_dt_index < split_idx] # type: ignore
@@ -247,24 +269,32 @@ class Trainer(Fit):
247
269
  calibrator.set_options(trial, x)
248
270
  calibrator.fit(x_pred, y=y_train)
249
271
 
272
+ # Output
273
+ y_pred = model.transform(x_test)
274
+ y_pred = calibrator.transform(y_pred)
275
+ output = 0.0
276
+ if determine_model_type(y_series) == ModelType.REGRESSION:
277
+ output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
278
+ else:
279
+ output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
280
+
250
281
  if save:
251
- folder = os.path.join(
252
- self._folder, str(y_series.name), split_idx.isoformat()
253
- )
254
- if not os.path.exists(folder):
255
- os.mkdir(folder)
256
282
  windower.save(folder, trial)
257
283
  reducer.save(folder, trial)
258
284
  weights.save(folder, trial)
259
285
  model.save(folder, trial)
260
286
  selector.save(folder, trial)
261
287
  calibrator.save(folder, trial)
262
-
263
- y_pred = model.transform(x_test)
264
- y_pred = calibrator.transform(y_pred)
265
- if determine_model_type(y_series) == ModelType.REGRESSION:
266
- return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
267
- return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
288
+ with open(trial_file, "w", encoding="utf8") as handle:
289
+ json.dump(
290
+ {
291
+ "number": trial.number,
292
+ "output": output,
293
+ },
294
+ handle,
295
+ )
296
+
297
+ return output
268
298
  except WavetrainException as exc:
269
299
  logging.warning(str(exc))
270
300
  return -1.0
@@ -403,12 +433,6 @@ class Trainer(Fit):
403
433
  + [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
404
434
  )
405
435
 
406
- def assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
407
- for i in range(len(bins) - 1):
408
- if bins[i] <= timestamp < bins[i + 1]:
409
- return i
410
- return len(bins) - 2 # Assign to last bin if at the end
411
-
412
436
  def perform_predictions(
413
437
  group: pd.DataFrame,
414
438
  column_path: str,
@@ -448,7 +472,7 @@ class Trainer(Fit):
448
472
 
449
473
  old_index = dt_index.copy()
450
474
  df = df.groupby(
451
- dt_index.map(functools.partial(assign_bin, bins=bins))
475
+ dt_index.map(functools.partial(_assign_bin, bins=bins))
452
476
  ).progress_apply( # type: ignore
453
477
  functools.partial(
454
478
  perform_predictions,
@@ -466,3 +490,21 @@ class Trainer(Fit):
466
490
  df[col] = input_df[col]
467
491
 
468
492
  return df
493
+
494
+ def feature_importances(self) -> dict[str, dict[str, float]]:
495
+ """Find the feature importances for the rolling models."""
496
+ feature_importances = {}
497
+
498
+ for column in os.listdir(self._folder):
499
+ column_path = os.path.join(self._folder, column)
500
+ if not os.path.isdir(column_path):
501
+ continue
502
+ for date_str in os.listdir(column_path):
503
+ date_path = os.path.join(column_path, date_str)
504
+ if not os.path.isdir(date_path):
505
+ continue
506
+ model = ModelRouter()
507
+ model.load(date_path)
508
+ feature_importances[date_str] = model.feature_importances
509
+
510
+ return feature_importances
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.28
3
+ Version: 0.0.30
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes