wavetrainer 0.0.8__tar.gz → 0.0.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {wavetrainer-0.0.8/wavetrainer.egg-info → wavetrainer-0.0.10}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/setup.py +1 -1
  3. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/trainer_test.py +22 -0
  4. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/__init__.py +1 -1
  5. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_model.py +18 -7
  6. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/base_selector_reducer.py +13 -1
  7. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/correlation_reducer.py +4 -0
  8. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/selector/selector.py +4 -0
  9. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/trainer.py +31 -10
  10. {wavetrainer-0.0.8 → wavetrainer-0.0.10/wavetrainer.egg-info}/PKG-INFO +1 -1
  11. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/LICENSE +0 -0
  12. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/MANIFEST.in +0 -0
  13. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/README.md +0 -0
  14. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/requirements.txt +0 -0
  15. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/setup.cfg +0 -0
  16. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/__init__.py +0 -0
  17. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/model/__init__.py +0 -0
  18. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/tests/model/catboost_kwargs_test.py +0 -0
  19. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/__init__.py +0 -0
  20. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/calibrator.py +0 -0
  21. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/calibrator_router.py +0 -0
  22. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  23. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  24. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/create.py +0 -0
  25. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/exceptions.py +0 -0
  26. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/fit.py +0 -0
  27. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/__init__.py +0 -0
  28. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  29. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_kwargs.py +0 -0
  30. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  31. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/model.py +0 -0
  32. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model/model_router.py +0 -0
  33. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/model_type.py +0 -0
  34. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/params.py +0 -0
  35. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/__init__.py +0 -0
  36. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/combined_reducer.py +1 -1
  37. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/constant_reducer.py +0 -0
  38. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  39. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  40. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/reducer/reducer.py +0 -0
  41. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/selector/__init__.py +0 -0
  42. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/__init__.py +0 -0
  43. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/class_weights.py +0 -0
  44. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/combined_weights.py +0 -0
  45. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/exponential_weights.py +0 -0
  46. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/linear_weights.py +0 -0
  47. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/noop_weights.py +0 -0
  48. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/sigmoid_weights.py +0 -0
  49. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/weights.py +0 -0
  50. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/weights/weights_router.py +0 -0
  51. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/windower/__init__.py +0 -0
  52. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer/windower/windower.py +0 -0
  53. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer.egg-info/SOURCES.txt +0 -0
  54. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer.egg-info/dependency_links.txt +0 -0
  55. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer.egg-info/not-zip-safe +0 -0
  56. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer.egg-info/requires.txt +0 -0
  57. {wavetrainer-0.0.8 → wavetrainer-0.0.10}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.8',
26
+ version='0.0.10',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -37,3 +37,25 @@ class TestTrainer(unittest.TestCase):
37
37
  df = trainer.transform(df)
38
38
  print("df:")
39
39
  print(df)
40
+
41
+ def test_trainer_dt_column(self):
42
+ with tempfile.TemporaryDirectory() as tmpdir:
43
+ trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1, dt_column="dt_column")
44
+ x_data = [i for i in range(100)]
45
+ x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
46
+ df = pd.DataFrame(
47
+ data={
48
+ "column1": x_data,
49
+ "dt_column": x_index,
50
+ },
51
+ )
52
+ y = pd.DataFrame(
53
+ data={
54
+ "y": [x % 2 == 0 for x in x_data],
55
+ },
56
+ index=df.index,
57
+ )
58
+ trainer.fit(df, y=y)
59
+ df = trainer.transform(df)
60
+ print("df:")
61
+ print(df)
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.8"
5
+ __VERSION__ = "0.0.10"
6
6
  __all__ = ("create",)
@@ -23,12 +23,13 @@ _DEPTH_KEY = "depth"
23
23
  _L2_LEAF_REG_KEY = "l2_leaf_reg"
24
24
  _BOOSTING_TYPE_KEY = "boosting_type"
25
25
  _MODEL_TYPE_KEY = "model_type"
26
+ _EARLY_STOPPING_ROUNDS = "early_stopping_rounds"
26
27
 
27
28
 
28
29
  class CatboostModel(Model):
29
30
  """A class that uses Catboost as a model."""
30
31
 
31
- # pylint: disable=too-many-positional-arguments,too-many-arguments
32
+ # pylint: disable=too-many-positional-arguments,too-many-arguments,too-many-instance-attributes
32
33
 
33
34
  _catboost: CatBoost | None
34
35
  _iterations: None | int
@@ -37,6 +38,7 @@ class CatboostModel(Model):
37
38
  _l2_leaf_reg: None | float
38
39
  _boosting_type: None | str
39
40
  _model_type: None | ModelType
41
+ _early_stopping_rounds: None | int
40
42
 
41
43
  @classmethod
42
44
  def name(cls) -> str:
@@ -51,6 +53,7 @@ class CatboostModel(Model):
51
53
  self._l2_leaf_reg = None
52
54
  self._boosting_type = None
53
55
  self._model_type = None
56
+ self._early_stopping_rounds = None
54
57
 
55
58
  @property
56
59
  def estimator(self) -> Any:
@@ -80,6 +83,9 @@ class CatboostModel(Model):
80
83
  self._boosting_type = trial.suggest_categorical(
81
84
  _BOOSTING_TYPE_KEY, ["Ordered", "Plain"]
82
85
  )
86
+ self._early_stopping_rounds = trial.suggest_int(
87
+ _EARLY_STOPPING_ROUNDS, 10, 1000
88
+ )
83
89
 
84
90
  def load(self, folder: str) -> None:
85
91
  with open(
@@ -92,6 +98,7 @@ class CatboostModel(Model):
92
98
  self._l2_leaf_reg = params[_L2_LEAF_REG_KEY]
93
99
  self._boosting_type = params[_BOOSTING_TYPE_KEY]
94
100
  self._model_type = ModelType(params[_MODEL_TYPE_KEY])
101
+ self._early_stopping_rounds = params[_EARLY_STOPPING_ROUNDS]
95
102
  catboost = self._provide_catboost()
96
103
  catboost.load_model(os.path.join(folder, _MODEL_FILENAME))
97
104
 
@@ -107,6 +114,7 @@ class CatboostModel(Model):
107
114
  _L2_LEAF_REG_KEY: self._l2_leaf_reg,
108
115
  _BOOSTING_TYPE_KEY: self._boosting_type,
109
116
  _MODEL_TYPE_KEY: str(self._model_type),
117
+ _EARLY_STOPPING_ROUNDS: self._early_stopping_rounds,
110
118
  },
111
119
  handle,
112
120
  )
@@ -141,7 +149,7 @@ class CatboostModel(Model):
141
149
  )
142
150
  catboost.fit(
143
151
  train_pool,
144
- early_stopping_rounds=100,
152
+ early_stopping_rounds=self._early_stopping_rounds,
145
153
  verbose=False,
146
154
  metric_period=100,
147
155
  eval_set=eval_pool,
@@ -149,7 +157,10 @@ class CatboostModel(Model):
149
157
  return self
150
158
 
151
159
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
152
- pred_pool = Pool(df)
160
+ pred_pool = Pool(
161
+ df,
162
+ cat_features=df.select_dtypes(include="category").columns.tolist(),
163
+ )
153
164
  catboost = self._provide_catboost()
154
165
  pred = catboost.predict(pred_pool)
155
166
  df = pd.DataFrame(
@@ -175,7 +186,7 @@ class CatboostModel(Model):
175
186
  depth=self._depth,
176
187
  l2_leaf_reg=self._l2_leaf_reg,
177
188
  boosting_type=self._boosting_type,
178
- early_stopping_rounds=100,
189
+ early_stopping_rounds=self._early_stopping_rounds,
179
190
  metric_period=100,
180
191
  )
181
192
  case ModelType.REGRESSION:
@@ -185,7 +196,7 @@ class CatboostModel(Model):
185
196
  depth=self._depth,
186
197
  l2_leaf_reg=self._l2_leaf_reg,
187
198
  boosting_type=self._boosting_type,
188
- early_stopping_rounds=100,
199
+ early_stopping_rounds=self._early_stopping_rounds,
189
200
  metric_period=100,
190
201
  )
191
202
  case ModelType.BINNED_BINARY:
@@ -195,7 +206,7 @@ class CatboostModel(Model):
195
206
  depth=self._depth,
196
207
  l2_leaf_reg=self._l2_leaf_reg,
197
208
  boosting_type=self._boosting_type,
198
- early_stopping_rounds=100,
209
+ early_stopping_rounds=self._early_stopping_rounds,
199
210
  metric_period=100,
200
211
  )
201
212
  case ModelType.MULTI_CLASSIFICATION:
@@ -205,7 +216,7 @@ class CatboostModel(Model):
205
216
  depth=self._depth,
206
217
  l2_leaf_reg=self._l2_leaf_reg,
207
218
  boosting_type=self._boosting_type,
208
- early_stopping_rounds=100,
219
+ early_stopping_rounds=self._early_stopping_rounds,
209
220
  metric_period=100,
210
221
  )
211
222
  self._catboost = catboost
@@ -1,5 +1,6 @@
1
1
  """A reducer that uses a base selector from the feature engine."""
2
2
 
3
+ import logging
3
4
  import os
4
5
  from typing import Self
5
6
 
@@ -26,6 +27,11 @@ class BaseSelectorReducer(Reducer):
26
27
  def name(cls) -> str:
27
28
  raise NotImplementedError("name not implemented in parent class.")
28
29
 
30
+ @classmethod
31
+ def should_raise(cls) -> bool:
32
+ """Whether the class should raise its exception if it encounters it."""
33
+ return True
34
+
29
35
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
30
36
  pass
31
37
 
@@ -45,11 +51,17 @@ class BaseSelectorReducer(Reducer):
45
51
  eval_x: pd.DataFrame | None = None,
46
52
  eval_y: pd.Series | pd.DataFrame | None = None,
47
53
  ) -> Self:
54
+ if len(df.columns) <= 1:
55
+ return self
48
56
  try:
49
57
  self._base_selector.fit(df) # type: ignore
50
58
  except ValueError as exc:
51
- raise WavetrainException() from exc
59
+ logging.warning(str(exc))
60
+ if self.should_raise():
61
+ raise WavetrainException() from exc
52
62
  return self
53
63
 
54
64
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
65
+ if len(df.columns) <= 1:
66
+ return df
55
67
  return self._base_selector.transform(df)
@@ -19,3 +19,7 @@ class CorrelationReducer(BaseSelectorReducer):
19
19
  @classmethod
20
20
  def name(cls) -> str:
21
21
  return "correlation"
22
+
23
+ @classmethod
24
+ def should_raise(cls) -> bool:
25
+ return False
@@ -53,6 +53,8 @@ class Selector(Params, Fit):
53
53
  model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
54
54
  if not isinstance(y, pd.Series):
55
55
  raise ValueError("y is not a series.")
56
+ if len(df.columns) <= 1:
57
+ return self
56
58
  n_features_to_select = max(1, int(len(df.columns) * self._feature_ratio))
57
59
  self._selector = RFE(
58
60
  self._model.estimator,
@@ -70,6 +72,8 @@ class Selector(Params, Fit):
70
72
  return self
71
73
 
72
74
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
75
+ if len(df.columns) <= 1:
76
+ return df
73
77
  selector = self._selector
74
78
  if selector is None:
75
79
  raise ValueError("selector is null.")
@@ -165,7 +165,11 @@ class Trainer(Fit):
165
165
  if y is None:
166
166
  return self
167
167
 
168
- dt_index = df.index if self._dt_column is None else df[self._dt_column]
168
+ dt_index = (
169
+ df.index
170
+ if self._dt_column is None
171
+ else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
172
+ )
169
173
 
170
174
  def _fit_column(y_series: pd.Series):
171
175
  column_dir = os.path.join(self._folder, str(y_series.name))
@@ -184,10 +188,10 @@ class Trainer(Fit):
184
188
  trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
185
189
 
186
190
  train_dt_index = dt_index[: len(x)]
187
- x_train = x[train_dt_index < split_idx]
188
- x_test = x[train_dt_index >= split_idx]
189
- y_train = y_series[train_dt_index < split_idx]
190
- y_test = y_series[train_dt_index >= split_idx]
191
+ x_train = x[train_dt_index < split_idx] # type: ignore
192
+ x_test = x[train_dt_index >= split_idx] # type: ignore
193
+ y_train = y_series[train_dt_index < split_idx] # type: ignore
194
+ y_test = y_series[train_dt_index >= split_idx] # type: ignore
191
195
 
192
196
  try:
193
197
  # Window the data
@@ -250,14 +254,15 @@ class Trainer(Fit):
250
254
  return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
251
255
  return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
252
256
  except WavetrainException as exc:
257
+ logging.warning("WE DID NOT END UP TRAINING ANYTHING!!!!!")
253
258
  logging.warning(str(exc))
254
259
  return -1.0
255
260
 
256
261
  start_validation_index = (
257
- dt_index[-int(len(dt_index) * self._validation_size) - 1]
262
+ dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
258
263
  if isinstance(self._validation_size, float)
259
264
  else dt_index[
260
- dt_index >= (dt_index.to_list()[-1] - self._validation_size)
265
+ dt_index >= (dt_index.to_list()[-1] - self._validation_size) # type: ignore
261
266
  ].to_list()[0]
262
267
  )
263
268
  test_df = df[dt_index < start_validation_index]
@@ -288,7 +293,12 @@ class Trainer(Fit):
288
293
  )
289
294
 
290
295
  train_len = len(df[dt_index < start_test_index])
291
- test_len = len(df.loc[start_test_index:start_validation_index])
296
+ test_len = len(
297
+ dt_index[
298
+ (dt_index >= start_test_index)
299
+ & (dt_index <= start_validation_index)
300
+ ]
301
+ )
292
302
 
293
303
  last_processed_dt = None
294
304
  for count, test_idx in tqdm.tqdm(
@@ -341,7 +351,11 @@ class Trainer(Fit):
341
351
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
342
352
  """Predict the expected values of the data."""
343
353
  feature_columns = df.columns.values
344
- dt_index = df.index if self._dt_column is None else df[self._dt_column]
354
+ dt_index = (
355
+ df.index
356
+ if self._dt_column is None
357
+ else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
358
+ )
345
359
 
346
360
  for column in os.listdir(self._folder):
347
361
  column_path = os.path.join(self._folder, column)
@@ -353,6 +367,8 @@ class Trainer(Fit):
353
367
  if not os.path.isdir(date_path):
354
368
  continue
355
369
  dates.append(datetime.datetime.fromisoformat(date_str))
370
+ if not dates:
371
+ raise ValueError(f"no dates found for {column}.")
356
372
  bins: list[datetime.datetime] = sorted(
357
373
  [dt_index.min().to_pydatetime()]
358
374
  + dates
@@ -371,7 +387,12 @@ class Trainer(Fit):
371
387
  column: str,
372
388
  dates: list[datetime.datetime],
373
389
  ) -> pd.DataFrame:
374
- filtered_dates = [x for x in dates if x < group.index.max()]
390
+ group_dt_index = (
391
+ group.index
392
+ if self._dt_column is None
393
+ else pd.DatetimeIndex(pd.to_datetime(group[self._dt_column]))
394
+ )
395
+ filtered_dates = [x for x in dates if x < group_dt_index.max()]
375
396
  if not filtered_dates:
376
397
  filtered_dates = [dates[-1]]
377
398
  date_str = dates[-1].isoformat()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes
@@ -27,8 +27,8 @@ class CombinedReducer(Reducer):
27
27
  self._reducers = [
28
28
  ConstantReducer(),
29
29
  DuplicateReducer(),
30
- CorrelationReducer(),
31
30
  NonNumericReducer(),
31
+ CorrelationReducer(),
32
32
  ]
33
33
 
34
34
  @classmethod