wavetrainer 0.0.25__tar.gz → 0.0.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {wavetrainer-0.0.25/wavetrainer.egg-info → wavetrainer-0.0.26}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/setup.py +1 -1
  3. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/create.py +2 -0
  5. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/combined_reducer.py +4 -3
  6. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/pca_reducer.py +27 -10
  7. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/trainer.py +7 -2
  8. {wavetrainer-0.0.25 → wavetrainer-0.0.26/wavetrainer.egg-info}/PKG-INFO +1 -1
  9. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/LICENSE +0 -0
  10. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/MANIFEST.in +0 -0
  11. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/README.md +0 -0
  12. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/requirements.txt +0 -0
  13. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/setup.cfg +0 -0
  14. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/__init__.py +0 -0
  15. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/model/__init__.py +0 -0
  16. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/model/catboost_kwargs_test.py +0 -0
  17. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/tests/trainer_test.py +0 -0
  18. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/__init__.py +0 -0
  19. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/calibrator.py +0 -0
  20. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/calibrator_router.py +0 -0
  21. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  22. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  23. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/exceptions.py +0 -0
  24. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/fit.py +0 -0
  25. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/__init__.py +0 -0
  26. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  27. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_kwargs.py +0 -0
  28. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_model.py +0 -0
  29. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  30. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/model.py +0 -0
  31. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/model_router.py +0 -0
  32. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model/tabpfn_model.py +0 -0
  33. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/model_type.py +0 -0
  34. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/params.py +0 -0
  35. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/__init__.py +0 -0
  36. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  37. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/constant_reducer.py +0 -0
  38. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/correlation_reducer.py +0 -0
  39. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  40. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  41. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/reducer.py +0 -0
  42. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/reducer/unseen_reducer.py +0 -0
  43. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/selector/__init__.py +0 -0
  44. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/selector/selector.py +0 -0
  45. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/__init__.py +0 -0
  46. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/class_weights.py +0 -0
  47. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/combined_weights.py +0 -0
  48. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/exponential_weights.py +0 -0
  49. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/linear_weights.py +0 -0
  50. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/noop_weights.py +0 -0
  51. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/sigmoid_weights.py +0 -0
  52. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/weights.py +0 -0
  53. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/weights/weights_router.py +0 -0
  54. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/windower/__init__.py +0 -0
  55. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer/windower/windower.py +0 -0
  56. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/SOURCES.txt +0 -0
  57. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/dependency_links.txt +0 -0
  58. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/not-zip-safe +0 -0
  59. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/requires.txt +0 -0
  60. {wavetrainer-0.0.25 → wavetrainer-0.0.26}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.25
3
+ Version: 0.0.26
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.25',
26
+ version='0.0.26',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.25"
5
+ __VERSION__ = "0.0.26"
6
6
  __all__ = ("create",)
@@ -15,6 +15,7 @@ def create(
15
15
  dt_column: str | None = None,
16
16
  max_train_timeout: datetime.timedelta | None = None,
17
17
  cutoff_dt: datetime.datetime | None = None,
18
+ max_features: int | None = None,
18
19
  ) -> Trainer:
19
20
  """Create a trainer."""
20
21
  return Trainer(
@@ -25,4 +26,5 @@ def create(
25
26
  dt_column=dt_column,
26
27
  max_train_timeout=max_train_timeout,
27
28
  cutoff_dt=cutoff_dt,
29
+ max_features=max_features,
28
30
  )
@@ -25,15 +25,16 @@ class CombinedReducer(Reducer):
25
25
 
26
26
  # pylint: disable=too-many-positional-arguments,too-many-arguments
27
27
 
28
- def __init__(self):
28
+ def __init__(self, max_features: int | None):
29
29
  super().__init__()
30
+ self._max_features = max_features
30
31
  self._reducers = [
31
32
  UnseenReducer(),
32
33
  NonNumericReducer(),
33
34
  ConstantReducer(),
34
35
  DuplicateReducer(),
35
36
  CorrelationReducer(),
36
- PCAReducer(),
37
+ PCAReducer(max_features),
37
38
  ]
38
39
 
39
40
  @classmethod
@@ -62,7 +63,7 @@ class CombinedReducer(Reducer):
62
63
  elif reducer_name == UnseenReducer.name():
63
64
  self._reducers.append(UnseenReducer())
64
65
  elif reducer_name == PCAReducer.name():
65
- self._reducers.append(PCAReducer())
66
+ self._reducers.append(PCAReducer(self._max_features))
66
67
  for reducer in self._reducers:
67
68
  reducer.load(folder)
68
69
 
@@ -20,10 +20,15 @@ class PCAReducer(Reducer):
20
20
 
21
21
  # pylint: disable=too-many-positional-arguments,too-many-arguments
22
22
 
23
- def __init__(self):
23
+ def __init__(self, max_features: int | None):
24
24
  super().__init__()
25
- self._scaler = StandardScaler()
26
- self._pca = PCA(n_components=300)
25
+ self._max_features = max_features
26
+ if max_features is not None:
27
+ self._scaler = StandardScaler()
28
+ self._pca = PCA(n_components=max_features)
29
+ else:
30
+ self._scaler = None
31
+ self._pca = None
27
32
 
28
33
  @classmethod
29
34
  def name(cls) -> str:
@@ -33,12 +38,18 @@ class PCAReducer(Reducer):
33
38
  pass
34
39
 
35
40
  def load(self, folder: str) -> None:
36
- self._scaler = joblib.load(os.path.join(folder, _PCA_SCALER_FILE))
37
- self._pca = joblib.load(os.path.join(folder, _PCA_FILE))
41
+ pca_scaler_file = os.path.join(folder, _PCA_SCALER_FILE)
42
+ pca_file = os.path.join(folder, _PCA_FILE)
43
+ if os.path.exists(pca_scaler_file):
44
+ self._scaler = joblib.load(pca_scaler_file)
45
+ if os.path.exists(pca_file):
46
+ self._pca = joblib.load(pca_file)
38
47
 
39
48
  def save(self, folder: str) -> None:
40
- joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
41
- joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
49
+ if self._scaler is not None:
50
+ joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
51
+ if self._pca is not None:
52
+ joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
42
53
 
43
54
  def fit(
44
55
  self,
@@ -48,13 +59,19 @@ class PCAReducer(Reducer):
48
59
  eval_x: pd.DataFrame | None = None,
49
60
  eval_y: pd.Series | pd.DataFrame | None = None,
50
61
  ) -> Self:
51
- if len(df.columns.values) < self._pca.n_components: # type: ignore
62
+ pca = self._pca
63
+ scaler = self._scaler
64
+ if pca is None or scaler is None:
65
+ return self
66
+ if len(df.columns.values) < pca.n_components: # type: ignore
52
67
  return self
53
- x_scaled = self._scaler.fit_transform(df)
54
- self._pca.fit(x_scaled)
68
+ x_scaled = scaler.fit_transform(df)
69
+ pca.fit(x_scaled)
55
70
  return self
56
71
 
57
72
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
73
+ if self._pca is None:
74
+ return df
58
75
  if len(df.columns.values) < self._pca.n_components: # type: ignore
59
76
  return df
60
77
  return self._pca.transform(df)
@@ -36,6 +36,7 @@ _TEST_SIZE_KEY = "test_size"
36
36
  _VALIDATION_SIZE_KEY = "validation_size"
37
37
  _IDX_USR_ATTR_KEY = "idx"
38
38
  _DT_COLUMN_KEY = "dt_column"
39
+ _MAX_FEATURES_KEY = "max_features"
39
40
 
40
41
 
41
42
  class Trainer(Fit):
@@ -53,6 +54,7 @@ class Trainer(Fit):
53
54
  dt_column: str | None = None,
54
55
  max_train_timeout: datetime.timedelta | None = None,
55
56
  cutoff_dt: datetime.datetime | None = None,
57
+ max_features: int | None = None,
56
58
  ):
57
59
  tqdm.tqdm.pandas()
58
60
 
@@ -103,6 +105,7 @@ class Trainer(Fit):
103
105
  )
104
106
  if dt_column is None:
105
107
  dt_column = params[_DT_COLUMN_KEY]
108
+ max_features = params.get(_MAX_FEATURES_KEY)
106
109
  else:
107
110
  with open(params_file, "w", encoding="utf8") as handle:
108
111
  validation_size_value = None
@@ -133,6 +136,7 @@ class Trainer(Fit):
133
136
  _TEST_SIZE_KEY: test_size_value,
134
137
  _VALIDATION_SIZE_KEY: validation_size_value,
135
138
  _DT_COLUMN_KEY: dt_column,
139
+ _MAX_FEATURES_KEY: max_features,
136
140
  },
137
141
  handle,
138
142
  )
@@ -143,6 +147,7 @@ class Trainer(Fit):
143
147
  self._dt_column = dt_column
144
148
  self._max_train_timeout = max_train_timeout
145
149
  self._cutoff_dt = cutoff_dt
150
+ self._max_features = max_features
146
151
 
147
152
  def _provide_study(self, column: str) -> optuna.Study:
148
153
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -216,7 +221,7 @@ class Trainer(Fit):
216
221
  return -1.0
217
222
 
218
223
  # Perform common reductions
219
- reducer = CombinedReducer()
224
+ reducer = CombinedReducer(self._max_features)
220
225
  reducer.set_options(trial)
221
226
  x_train = reducer.fit_transform(x_train)
222
227
  x_test = reducer.transform(x_test)
@@ -425,7 +430,7 @@ class Trainer(Fit):
425
430
  date_str = dates[-1].isoformat()
426
431
  folder = os.path.join(column_path, date_str)
427
432
 
428
- reducer = CombinedReducer()
433
+ reducer = CombinedReducer(self._max_features)
429
434
  reducer.load(folder)
430
435
 
431
436
  model = ModelRouter()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.25
3
+ Version: 0.0.26
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes