wavetrainer 0.0.38__tar.gz → 0.0.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {wavetrainer-0.0.38/wavetrainer.egg-info → wavetrainer-0.0.39}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/setup.py +1 -1
  3. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_model.py +3 -2
  5. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/combined_reducer.py +6 -1
  6. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +6 -3
  7. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/trainer.py +17 -3
  8. {wavetrainer-0.0.38 → wavetrainer-0.0.39/wavetrainer.egg-info}/PKG-INFO +1 -1
  9. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/LICENSE +0 -0
  10. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/MANIFEST.in +0 -0
  11. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/README.md +0 -0
  12. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/requirements.txt +0 -0
  13. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/setup.cfg +0 -0
  14. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/__init__.py +0 -0
  15. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/model/__init__.py +0 -0
  16. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/model/catboost_kwargs_test.py +0 -0
  17. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/tests/trainer_test.py +0 -0
  18. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/__init__.py +0 -0
  19. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/calibrator.py +0 -0
  20. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/calibrator_router.py +0 -0
  21. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  22. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  23. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/create.py +0 -0
  24. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/exceptions.py +0 -0
  25. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/fit.py +0 -0
  26. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/__init__.py +0 -0
  27. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  28. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_kwargs.py +0 -0
  29. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  30. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/model.py +0 -0
  31. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/model_router.py +0 -0
  32. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model/tabpfn_model.py +0 -0
  33. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/model_type.py +0 -0
  34. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/params.py +0 -0
  35. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/__init__.py +0 -0
  36. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  37. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/constant_reducer.py +0 -0
  38. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/correlation_reducer.py +0 -0
  39. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  40. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  41. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  42. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/reducer.py +0 -0
  43. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  44. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/reducer/unseen_reducer.py +0 -0
  45. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/selector/__init__.py +0 -0
  46. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/selector/selector.py +0 -0
  47. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/__init__.py +0 -0
  48. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/class_weights.py +0 -0
  49. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/combined_weights.py +0 -0
  50. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/exponential_weights.py +0 -0
  51. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/linear_weights.py +0 -0
  52. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/noop_weights.py +0 -0
  53. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/sigmoid_weights.py +0 -0
  54. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/weights.py +0 -0
  55. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/weights/weights_router.py +0 -0
  56. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/windower/__init__.py +0 -0
  57. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer/windower/windower.py +0 -0
  58. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/SOURCES.txt +0 -0
  59. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/dependency_links.txt +0 -0
  60. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/not-zip-safe +0 -0
  61. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/requires.txt +0 -0
  62. {wavetrainer-0.0.38 → wavetrainer-0.0.39}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.38
3
+ Version: 0.0.39
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.38',
26
+ version='0.0.39',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.38"
5
+ __VERSION__ = "0.0.39"
6
6
  __all__ = ("create",)
@@ -148,7 +148,7 @@ class CatboostModel(Model):
148
148
  )
149
149
  catboost = self._provide_catboost()
150
150
  catboost.save_model(os.path.join(folder, _MODEL_FILENAME))
151
- trial.user_attrs[_BEST_ITERATION_KEY] = self._best_iteration
151
+ trial.set_user_attr(_BEST_ITERATION_KEY, self._best_iteration)
152
152
 
153
153
  def fit(
154
154
  self,
@@ -219,9 +219,10 @@ class CatboostModel(Model):
219
219
  best_iteration if best_iteration is not None else self._iterations
220
220
  )
221
221
  logging.info(
222
- "Creating catboost model with depth %d, boosting type %s",
222
+ "Creating catboost model with depth %d, boosting type %s, best iteration %d",
223
223
  self._depth,
224
224
  self._boosting_type,
225
+ best_iteration,
225
226
  )
226
227
  match self._model_type:
227
228
  case ModelType.BINARY:
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  import logging
5
5
  import os
6
+ import time
6
7
  from typing import Self
7
8
 
8
9
  import optuna
@@ -37,7 +38,7 @@ class CombinedReducer(Reducer):
37
38
  DuplicateReducer(),
38
39
  CorrelationReducer(),
39
40
  SmartCorrelationReducer(),
40
- SelectBySingleFeaturePerformanceReducer(),
41
+ # SelectBySingleFeaturePerformanceReducer(),
41
42
  ]
42
43
  self._folder = None
43
44
 
@@ -99,12 +100,16 @@ class CombinedReducer(Reducer):
99
100
  ) -> Self:
100
101
  removed_columns_dict = {}
101
102
  for reducer in self._reducers:
103
+ start_reducer = time.time()
102
104
  before_columns = set(df.columns.values)
103
105
  df = reducer.fit_transform(df, y=y)
104
106
  after_columns = set(df.columns.values)
105
107
  removed_columns = before_columns.difference(after_columns)
106
108
  if removed_columns:
107
109
  removed_columns_dict[reducer.name()] = list(removed_columns)
110
+ logging.info(
111
+ "%s reducer took %f", reducer.name(), time.time() - start_reducer
112
+ )
108
113
  if self._folder is not None:
109
114
  with open(
110
115
  os.path.join(self._folder, _REMOVED_COLUMNS_FILE), encoding="utf8"
@@ -1,5 +1,6 @@
1
1
  """A reducer that removes features by their single performance via further heuristics."""
2
2
 
3
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
3
4
  from typing import Self
4
5
 
5
6
  import optuna
@@ -7,8 +8,8 @@ import pandas as pd
7
8
  from feature_engine.selection import SelectBySingleFeaturePerformance
8
9
  from sklearn.ensemble import RandomForestClassifier # type: ignore
9
10
 
10
- from .base_selector_reducer import BaseSelectorReducer
11
11
  from ..model_type import ModelType, determine_model_type
12
+ from .base_selector_reducer import BaseSelectorReducer
12
13
 
13
14
  _SINGLE_FEATURE_PERFORMANCE_REDUCER_FILENAME = (
14
15
  "single_feature_performance_reducer.joblib"
@@ -23,7 +24,7 @@ class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
23
24
 
24
25
  def __init__(self) -> None:
25
26
  self._singlefeatureperformance_selector = SelectBySingleFeaturePerformance(
26
- RandomForestClassifier(random_state=42), scoring="accuracy"
27
+ RandomForestClassifier(random_state=42, n_jobs=-1), scoring="accuracy", cv=1
27
28
  )
28
29
  super().__init__(
29
30
  self._singlefeatureperformance_selector,
@@ -53,5 +54,7 @@ class SelectBySingleFeaturePerformanceReducer(BaseSelectorReducer):
53
54
  eval_x: pd.DataFrame | None = None,
54
55
  eval_y: pd.Series | pd.DataFrame | None = None,
55
56
  ) -> Self:
56
- self._singlefeatureperformance_selector.scoring = "r2" if determine_model_type(y) == ModelType.REGRESSION else "accuracy"
57
+ self._singlefeatureperformance_selector.scoring = (
58
+ "r2" if determine_model_type(y) == ModelType.REGRESSION else "accuracy" # type: ignore
59
+ )
57
60
  return super().fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
@@ -6,6 +6,7 @@ import json
6
6
  import logging
7
7
  import os
8
8
  import pickle
9
+ import time
9
10
  from typing import Self
10
11
 
11
12
  import optuna
@@ -231,6 +232,7 @@ class Trainer(Fit):
231
232
 
232
233
  try:
233
234
  # Window the data
235
+ start_windower = time.time()
234
236
  windower = Windower(self._dt_column)
235
237
  windower.set_options(trial, x)
236
238
  x_train = windower.fit_transform(x_train)
@@ -240,25 +242,31 @@ class Trainer(Fit):
240
242
  os.removedirs(folder)
241
243
  logging.warning("Y train only contains 1 unique datapoint.")
242
244
  return _BAD_OUTPUT
245
+ logging.info("Windowing took %f", time.time() - start_windower)
243
246
 
244
247
  # Perform common reductions
248
+ start_reducer = time.time()
245
249
  reducer = CombinedReducer()
246
250
  reducer.set_options(trial, x)
247
251
  x_train = reducer.fit_transform(x_train, y=y_train)
248
252
  x_test = reducer.transform(x_test)
253
+ logging.info("Reducing took %f", time.time() - start_reducer)
249
254
 
250
255
  # Calculate the row weights
256
+ start_row_weights = time.time()
251
257
  weights = CombinedWeights()
252
258
  weights.set_options(trial, x)
253
259
  w = weights.fit(x_train, y=y_train).transform(y_train.to_frame())[
254
260
  WEIGHTS_COLUMN
255
261
  ]
262
+ logging.info("Row weights took %f", time.time() - start_row_weights)
256
263
 
257
264
  # Create model
258
265
  model = ModelRouter()
259
266
  model.set_options(trial, x)
260
267
 
261
268
  # Train
269
+ start_train = time.time()
262
270
  selector = Selector(model)
263
271
  selector.set_options(trial, x)
264
272
  selector.fit(x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test)
@@ -267,11 +275,14 @@ class Trainer(Fit):
267
275
  x_pred = model.fit_transform(
268
276
  x_train, y=y_train, w=w, eval_x=x_test, eval_y=y_test
269
277
  )
278
+ logging.info("Training took %f", time.time() - start_train)
270
279
 
271
280
  # Calibrate
281
+ start_calibrate = time.time()
272
282
  calibrator = CalibratorRouter(model)
273
283
  calibrator.set_options(trial, x)
274
284
  calibrator.fit(x_pred, y=y_train)
285
+ logging.info("Calibrating took %f", time.time() - start_calibrate)
275
286
 
276
287
  # Output
277
288
  y_pred = model.transform(x_test)
@@ -521,8 +532,11 @@ class Trainer(Fit):
521
532
  date_path = os.path.join(column_path, date_str)
522
533
  if not os.path.isdir(date_path):
523
534
  continue
524
- model = ModelRouter()
525
- model.load(date_path)
526
- feature_importances[date_str] = model.feature_importances
535
+ try:
536
+ model = ModelRouter()
537
+ model.load(date_path)
538
+ feature_importances[date_str] = model.feature_importances
539
+ except FileNotFoundError as exc:
540
+ logging.warning(str(exc))
527
541
 
528
542
  return feature_importances
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.38
3
+ Version: 0.0.39
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes