wavetrainer 0.0.24__tar.gz → 0.0.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {wavetrainer-0.0.24/wavetrainer.egg-info → wavetrainer-0.0.25}/PKG-INFO +11 -1
  2. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/README.md +5 -0
  3. wavetrainer-0.0.24/wavetrainer.egg-info/requires.txt → wavetrainer-0.0.25/requirements.txt +5 -0
  4. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/setup.py +1 -1
  5. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/tests/trainer_test.py +1 -1
  6. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/__init__.py +1 -1
  7. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/catboost_model.py +6 -0
  8. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/model.py +9 -0
  9. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/model_router.py +14 -5
  10. wavetrainer-0.0.25/wavetrainer/model/tabpfn_model.py +145 -0
  11. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/combined_reducer.py +4 -0
  12. wavetrainer-0.0.25/wavetrainer/reducer/pca_reducer.py +60 -0
  13. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/selector/selector.py +6 -3
  14. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/trainer.py +2 -1
  15. {wavetrainer-0.0.24 → wavetrainer-0.0.25/wavetrainer.egg-info}/PKG-INFO +11 -1
  16. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer.egg-info/SOURCES.txt +2 -0
  17. wavetrainer-0.0.24/requirements.txt → wavetrainer-0.0.25/wavetrainer.egg-info/requires.txt +6 -1
  18. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/LICENSE +0 -0
  19. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/MANIFEST.in +0 -0
  20. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/setup.cfg +0 -0
  21. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/tests/__init__.py +0 -0
  22. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/tests/model/__init__.py +0 -0
  23. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/tests/model/catboost_kwargs_test.py +0 -0
  24. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/calibrator/__init__.py +0 -0
  25. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/calibrator/calibrator.py +0 -0
  26. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/calibrator/calibrator_router.py +0 -0
  27. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  28. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  29. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/create.py +0 -0
  30. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/exceptions.py +0 -0
  31. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/fit.py +0 -0
  32. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/__init__.py +0 -0
  33. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/catboost_classifier_wrap.py +0 -0
  34. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/catboost_kwargs.py +0 -0
  35. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model/catboost_regressor_wrap.py +0 -0
  36. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/model_type.py +0 -0
  37. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/params.py +0 -0
  38. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/__init__.py +0 -0
  39. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  40. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/constant_reducer.py +0 -0
  41. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/correlation_reducer.py +0 -0
  42. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  43. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  44. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/reducer.py +0 -0
  45. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/reducer/unseen_reducer.py +0 -0
  46. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/selector/__init__.py +0 -0
  47. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/__init__.py +0 -0
  48. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/class_weights.py +0 -0
  49. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/combined_weights.py +0 -0
  50. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/exponential_weights.py +0 -0
  51. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/linear_weights.py +0 -0
  52. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/noop_weights.py +0 -0
  53. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/sigmoid_weights.py +0 -0
  54. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/weights.py +0 -0
  55. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/weights/weights_router.py +0 -0
  56. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/windower/__init__.py +0 -0
  57. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer/windower/windower.py +0 -0
  58. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer.egg-info/dependency_links.txt +0 -0
  59. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer.egg-info/not-zip-safe +0 -0
  60. {wavetrainer-0.0.24 → wavetrainer-0.0.25}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.24
3
+ Version: 0.0.25
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,6 +23,11 @@ Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
24
  Requires-Dist: pytz>=2025.1
25
25
  Requires-Dist: torch>=2.6.0
26
+ Requires-Dist: tabpfn>=2.0.6
27
+ Requires-Dist: tabpfn-extensions>=0.0.4
28
+ Requires-Dist: shap>=0.47.2
29
+ Requires-Dist: hyperopt>=0.2.7
30
+ Requires-Dist: pytest-is-running>=1.5.1
26
31
 
27
32
  # wavetrainer
28
33
 
@@ -52,6 +57,11 @@ Python 3.11.6:
52
57
  - [mapie](https://mapie.readthedocs.io/en/stable/)
53
58
  - [pytz](https://pythonhosted.org/pytz/)
54
59
  - [torch](https://pytorch.org/)
60
+ - [tabpfn](https://github.com/PriorLabs/TabPFN)
61
+ - [tabpfn-extensions](https://github.com/PriorLabs/tabpfn-extensions)
62
+ - [shap](https://shap.readthedocs.io/en/latest/)
63
+ - [hyperopt](https://hyperopt.github.io/hyperopt/)
64
+ - [pytest-is-running](https://github.com/adamchainz/pytest-is-running)
55
65
 
56
66
  ## Raison D'être :thought_balloon:
57
67
 
@@ -26,6 +26,11 @@ Python 3.11.6:
26
26
  - [mapie](https://mapie.readthedocs.io/en/stable/)
27
27
  - [pytz](https://pythonhosted.org/pytz/)
28
28
  - [torch](https://pytorch.org/)
29
+ - [tabpfn](https://github.com/PriorLabs/TabPFN)
30
+ - [tabpfn-extensions](https://github.com/PriorLabs/tabpfn-extensions)
31
+ - [shap](https://shap.readthedocs.io/en/latest/)
32
+ - [hyperopt](https://hyperopt.github.io/hyperopt/)
33
+ - [pytest-is-running](https://github.com/adamchainz/pytest-is-running)
29
34
 
30
35
  ## Raison D'être :thought_balloon:
31
36
 
@@ -10,3 +10,8 @@ venn-abers>=1.4.6
10
10
  mapie>=0.9.2
11
11
  pytz>=2025.1
12
12
  torch>=2.6.0
13
+ tabpfn>=2.0.6
14
+ tabpfn-extensions>=0.0.4
15
+ shap>=0.47.2
16
+ hyperopt>=0.2.7
17
+ pytest-is-running>=1.5.1
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.24',
26
+ version='0.0.25',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -14,7 +14,7 @@ class TestTrainer(unittest.TestCase):
14
14
  def test_trainer(self):
15
15
  with tempfile.TemporaryDirectory() as tmpdir:
16
16
  trainer = Trainer(tmpdir, walkforward_timedelta=datetime.timedelta(days=7), trials=1)
17
- x_data = [i for i in range(100)]
17
+ x_data = [i for i in range(101)]
18
18
  x_index = [datetime.datetime(2022, 1, 1) + datetime.timedelta(days=i) for i in range(len(x_data))]
19
19
  df = pd.DataFrame(
20
20
  data={
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.24"
5
+ __VERSION__ = "0.0.25"
6
6
  __all__ = ("create",)
@@ -61,12 +61,17 @@ class CatboostModel(Model):
61
61
  def estimator(self) -> Any:
62
62
  return self._provide_catboost()
63
63
 
64
+ @property
65
+ def supports_importances(self) -> bool:
66
+ return True
67
+
64
68
  def pre_fit(
65
69
  self,
66
70
  df: pd.DataFrame,
67
71
  y: pd.Series | pd.DataFrame | None,
68
72
  eval_x: pd.DataFrame | None = None,
69
73
  eval_y: pd.Series | pd.DataFrame | None = None,
74
+ w: pd.Series | None = None,
70
75
  ):
71
76
  if y is None:
72
77
  raise ValueError("y is null.")
@@ -75,6 +80,7 @@ class CatboostModel(Model):
75
80
  EVAL_SET_ARG_KEY: (eval_x, eval_y),
76
81
  CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
77
82
  ORIGINAL_X_ARG_KEY: df,
83
+ "sample_weight": w,
78
84
  }
79
85
 
80
86
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
@@ -1,5 +1,6 @@
1
1
  """The prototype model class."""
2
2
 
3
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
3
4
  from typing import Any
4
5
 
5
6
  import pandas as pd
@@ -24,12 +25,20 @@ class Model(Params, Fit):
24
25
  """The estimator backing the model."""
25
26
  raise NotImplementedError("estimator not implemented in parent class.")
26
27
 
28
+ @property
29
+ def supports_importances(self) -> bool:
30
+ """Whether this model supports feature importances."""
31
+ raise NotImplementedError(
32
+ "supports_importances not implemented in parent class."
33
+ )
34
+
27
35
  def pre_fit(
28
36
  self,
29
37
  df: pd.DataFrame,
30
38
  y: pd.Series | pd.DataFrame | None,
31
39
  eval_x: pd.DataFrame | None = None,
32
40
  eval_y: pd.Series | pd.DataFrame | None = None,
41
+ w: pd.Series | None = None,
33
42
  ) -> dict[str, Any]:
34
43
  """A call to make sure the model is prepared for the target type."""
35
44
  raise NotImplementedError("pre_fit not implemented in parent class.")
@@ -9,11 +9,13 @@ import pandas as pd
9
9
 
10
10
  from .catboost_model import CatboostModel
11
11
  from .model import Model
12
+ from .tabpfn_model import TabPFNModel
12
13
 
13
14
  _MODEL_ROUTER_FILE = "model_router.json"
14
15
  _MODEL_KEY = "model"
15
16
  _MODELS = {
16
17
  CatboostModel.name(): CatboostModel,
18
+ TabPFNModel.name(): TabPFNModel,
17
19
  }
18
20
 
19
21
 
@@ -39,23 +41,30 @@ class ModelRouter(Model):
39
41
  raise ValueError("model is null")
40
42
  return model.estimator
41
43
 
44
+ @property
45
+ def supports_importances(self) -> bool:
46
+ model = self._model
47
+ if model is None:
48
+ raise ValueError("model is null")
49
+ return model.supports_importances
50
+
42
51
  def pre_fit(
43
52
  self,
44
53
  df: pd.DataFrame,
45
54
  y: pd.Series | pd.DataFrame | None,
46
55
  eval_x: pd.DataFrame | None = None,
47
56
  eval_y: pd.Series | pd.DataFrame | None = None,
57
+ w: pd.Series | None = None,
48
58
  ) -> dict[str, Any]:
49
59
  model = self._model
50
60
  if model is None:
51
61
  raise ValueError("model is null")
52
- return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
62
+ return model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
53
63
 
54
64
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
55
- self._model = _MODELS[
56
- trial.suggest_categorical("model", list(_MODELS.keys()))
57
- ]()
58
- self._model.set_options(trial)
65
+ model = _MODELS[trial.suggest_categorical("model", list(_MODELS.keys()))]()
66
+ model.set_options(trial)
67
+ self._model = model
59
68
 
60
69
  def load(self, folder: str) -> None:
61
70
  with open(os.path.join(folder, _MODEL_ROUTER_FILE), encoding="utf8") as handle:
@@ -0,0 +1,145 @@
1
+ """A model that wraps tabpfn."""
2
+ # pylint: disable=duplicate-code,too-many-arguments,too-many-positional-arguments
3
+
4
+ import json
5
+ import os
6
+ import pickle
7
+ from typing import Any, Self
8
+
9
+ import optuna
10
+ import pandas as pd
11
+ import pytest_is_running
12
+ import torch
13
+ from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import ( # type: ignore
14
+ AutoTabPFNClassifier, AutoTabPFNRegressor)
15
+
16
+ from ..model_type import ModelType, determine_model_type
17
+ from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
18
+
19
+ _MODEL_FILENAME = "model.pkl"
20
+ _MODEL_PARAMS_FILENAME = "model_params.json"
21
+ _MODEL_TYPE_KEY = "model_type"
22
+
23
+
24
+ class TabPFNModel(Model):
25
+ """A class that uses TabPFN as a model."""
26
+
27
+ _tabpfn: AutoTabPFNClassifier | AutoTabPFNRegressor | None
28
+ _model_type: None | ModelType
29
+
30
+ @classmethod
31
+ def name(cls) -> str:
32
+ return "tabpfn"
33
+
34
+ def __init__(self) -> None:
35
+ super().__init__()
36
+ self._tabpfn = None
37
+ self._model_type = None
38
+
39
+ @property
40
+ def estimator(self) -> Any:
41
+ return self._provide_tabpfn()
42
+
43
+ @property
44
+ def supports_importances(self) -> bool:
45
+ return False
46
+
47
+ def pre_fit(
48
+ self,
49
+ df: pd.DataFrame,
50
+ y: pd.Series | pd.DataFrame | None,
51
+ eval_x: pd.DataFrame | None = None,
52
+ eval_y: pd.Series | pd.DataFrame | None = None,
53
+ w: pd.Series | None = None,
54
+ ):
55
+ if y is None:
56
+ raise ValueError("y is null.")
57
+ self._model_type = determine_model_type(y)
58
+ return {}
59
+
60
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
61
+ pass
62
+
63
+ def load(self, folder: str) -> None:
64
+ with open(os.path.join(folder, _MODEL_FILENAME), "rb") as f:
65
+ self._tabpfn = pickle.load(f)
66
+ with open(
67
+ os.path.join(folder, _MODEL_PARAMS_FILENAME), encoding="utf8"
68
+ ) as handle:
69
+ params = json.load(handle)
70
+ self._model_type = ModelType(params[_MODEL_TYPE_KEY])
71
+
72
+ def save(self, folder: str) -> None:
73
+ with open(os.path.join(folder, _MODEL_FILENAME), "wb") as f:
74
+ pickle.dump(self._tabpfn, f)
75
+ with open(
76
+ os.path.join(folder, _MODEL_PARAMS_FILENAME), "w", encoding="utf8"
77
+ ) as handle:
78
+ json.dump(
79
+ {
80
+ _MODEL_TYPE_KEY: str(self._model_type),
81
+ },
82
+ handle,
83
+ )
84
+
85
+ def fit(
86
+ self,
87
+ df: pd.DataFrame,
88
+ y: pd.Series | pd.DataFrame | None = None,
89
+ w: pd.Series | None = None,
90
+ eval_x: pd.DataFrame | None = None,
91
+ eval_y: pd.Series | pd.DataFrame | None = None,
92
+ ) -> Self:
93
+ if y is None:
94
+ raise ValueError("y is null.")
95
+ self._model_type = determine_model_type(y)
96
+ tabpfn = self._provide_tabpfn()
97
+ tabpfn.fit(df, y)
98
+ return self
99
+
100
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
101
+ tabpfn = self._provide_tabpfn()
102
+ if tabpfn is None:
103
+ raise ValueError("tabpfn is null")
104
+ pred = tabpfn.predict(df)
105
+ new_df = pd.DataFrame(
106
+ index=df.index,
107
+ data={
108
+ PREDICTION_COLUMN: pred.flatten(),
109
+ },
110
+ )
111
+ if isinstance(tabpfn, AutoTabPFNClassifier):
112
+ proba = tabpfn.predict_proba(df)
113
+ for i in range(proba.shape[1]):
114
+ new_df[f"{PROBABILITY_COLUMN_PREFIX}{i}"] = proba[:, i]
115
+ return new_df
116
+
117
+ def _provide_tabpfn(self) -> AutoTabPFNClassifier | AutoTabPFNRegressor:
118
+ tabpfn = self._tabpfn
119
+ if tabpfn is None:
120
+ max_time = 1 if pytest_is_running.is_running() else 120
121
+ match self._model_type:
122
+ case ModelType.BINARY:
123
+ tabpfn = AutoTabPFNClassifier(
124
+ max_time=max_time,
125
+ device="cuda" if torch.cuda.is_available() else "cpu",
126
+ )
127
+ case ModelType.REGRESSION:
128
+ tabpfn = AutoTabPFNRegressor(
129
+ max_time=max_time,
130
+ device="cuda" if torch.cuda.is_available() else "cpu",
131
+ )
132
+ case ModelType.BINNED_BINARY:
133
+ tabpfn = AutoTabPFNClassifier(
134
+ max_time=max_time,
135
+ device="cuda" if torch.cuda.is_available() else "cpu",
136
+ )
137
+ case ModelType.MULTI_CLASSIFICATION:
138
+ tabpfn = AutoTabPFNClassifier(
139
+ max_time=max_time,
140
+ device="cuda" if torch.cuda.is_available() else "cpu",
141
+ )
142
+ self._tabpfn = tabpfn
143
+ if tabpfn is None:
144
+ raise ValueError("tabpfn is null")
145
+ return tabpfn
@@ -12,6 +12,7 @@ from .constant_reducer import ConstantReducer
12
12
  from .correlation_reducer import CorrelationReducer
13
13
  from .duplicate_reducer import DuplicateReducer
14
14
  from .nonnumeric_reducer import NonNumericReducer
15
+ from .pca_reducer import PCAReducer
15
16
  from .reducer import Reducer
16
17
  from .unseen_reducer import UnseenReducer
17
18
 
@@ -32,6 +33,7 @@ class CombinedReducer(Reducer):
32
33
  ConstantReducer(),
33
34
  DuplicateReducer(),
34
35
  CorrelationReducer(),
36
+ PCAReducer(),
35
37
  ]
36
38
 
37
39
  @classmethod
@@ -59,6 +61,8 @@ class CombinedReducer(Reducer):
59
61
  self._reducers.append(NonNumericReducer())
60
62
  elif reducer_name == UnseenReducer.name():
61
63
  self._reducers.append(UnseenReducer())
64
+ elif reducer_name == PCAReducer.name():
65
+ self._reducers.append(PCAReducer())
62
66
  for reducer in self._reducers:
63
67
  reducer.load(folder)
64
68
 
@@ -0,0 +1,60 @@
1
+ """A reducer that removes low variance columns."""
2
+
3
+ import os
4
+ from typing import Self
5
+
6
+ import joblib # type: ignore
7
+ import optuna
8
+ import pandas as pd
9
+ from sklearn.decomposition import PCA # type: ignore
10
+ from sklearn.preprocessing import StandardScaler # type: ignore
11
+
12
+ from .reducer import Reducer
13
+
14
+ _PCA_FILE = "pca.joblib"
15
+ _PCA_SCALER_FILE = "pca_scaler.joblib"
16
+
17
+
18
+ class PCAReducer(Reducer):
19
+ """A class that removes low variance columns from a dataframe."""
20
+
21
+ # pylint: disable=too-many-positional-arguments,too-many-arguments
22
+
23
+ def __init__(self):
24
+ super().__init__()
25
+ self._scaler = StandardScaler()
26
+ self._pca = PCA(n_components=300)
27
+
28
+ @classmethod
29
+ def name(cls) -> str:
30
+ return "pca"
31
+
32
+ def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
33
+ pass
34
+
35
+ def load(self, folder: str) -> None:
36
+ self._scaler = joblib.load(os.path.join(folder, _PCA_SCALER_FILE))
37
+ self._pca = joblib.load(os.path.join(folder, _PCA_FILE))
38
+
39
+ def save(self, folder: str) -> None:
40
+ joblib.dump(self._scaler, os.path.join(folder, _PCA_SCALER_FILE))
41
+ joblib.dump(self._pca, os.path.join(folder, _PCA_FILE))
42
+
43
+ def fit(
44
+ self,
45
+ df: pd.DataFrame,
46
+ y: pd.Series | pd.DataFrame | None = None,
47
+ w: pd.Series | None = None,
48
+ eval_x: pd.DataFrame | None = None,
49
+ eval_y: pd.Series | pd.DataFrame | None = None,
50
+ ) -> Self:
51
+ if len(df.columns.values) < self._pca.n_components: # type: ignore
52
+ return self
53
+ x_scaled = self._scaler.fit_transform(df)
54
+ self._pca.fit(x_scaled)
55
+ return self
56
+
57
+ def transform(self, df: pd.DataFrame) -> pd.DataFrame:
58
+ if len(df.columns.values) < self._pca.n_components: # type: ignore
59
+ return df
60
+ return self._pca.transform(df)
@@ -49,8 +49,10 @@ class Selector(Params, Fit):
49
49
  eval_x: pd.DataFrame | None = None,
50
50
  eval_y: pd.Series | pd.DataFrame | None = None,
51
51
  ) -> Self:
52
+ if not self._model.supports_importances:
53
+ return self
52
54
  sklearn.set_config(enable_metadata_routing=False)
53
- model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y)
55
+ model_kwargs = self._model.pre_fit(df, y=y, eval_x=eval_x, eval_y=eval_y, w=w)
54
56
  if not isinstance(y, pd.Series):
55
57
  raise ValueError("y is not a series.")
56
58
  if len(df.columns) <= 1:
@@ -65,7 +67,7 @@ class Selector(Params, Fit):
65
67
  ),
66
68
  )
67
69
  try:
68
- self._selector.fit(df, y=y, sample_weight=w, **model_kwargs)
70
+ self._selector.fit(df, y=y, **model_kwargs)
69
71
  except ValueError as exc:
70
72
  # Catch issues with 1 feature as a reduction target.
71
73
  logging.warning(str(exc))
@@ -76,7 +78,8 @@ class Selector(Params, Fit):
76
78
  return df
77
79
  selector = self._selector
78
80
  if selector is None:
79
- raise ValueError("selector is null.")
81
+ logging.warning("selector is null")
82
+ return df
80
83
  try:
81
84
  return df[selector.get_feature_names_out()]
82
85
  except AttributeError as exc:
@@ -266,7 +266,6 @@ class Trainer(Fit):
266
266
  return float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
267
267
  return float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
268
268
  except WavetrainException as exc:
269
- logging.warning("WE DID NOT END UP TRAINING ANYTHING!!!!!")
270
269
  logging.warning(str(exc))
271
270
  return -1.0
272
271
 
@@ -343,6 +342,8 @@ class Trainer(Fit):
343
342
 
344
343
  test_df = df.iloc[: train_len + count + test_len]
345
344
  test_series = y_series.iloc[: train_len + count + test_len]
345
+ if len(test_df) <= 2:
346
+ continue
346
347
 
347
348
  if test_idx < start_validation_index:
348
349
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.24
3
+ Version: 0.0.25
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,6 +23,11 @@ Requires-Dist: venn-abers>=1.4.6
23
23
  Requires-Dist: mapie>=0.9.2
24
24
  Requires-Dist: pytz>=2025.1
25
25
  Requires-Dist: torch>=2.6.0
26
+ Requires-Dist: tabpfn>=2.0.6
27
+ Requires-Dist: tabpfn-extensions>=0.0.4
28
+ Requires-Dist: shap>=0.47.2
29
+ Requires-Dist: hyperopt>=0.2.7
30
+ Requires-Dist: pytest-is-running>=1.5.1
26
31
 
27
32
  # wavetrainer
28
33
 
@@ -52,6 +57,11 @@ Python 3.11.6:
52
57
  - [mapie](https://mapie.readthedocs.io/en/stable/)
53
58
  - [pytz](https://pythonhosted.org/pytz/)
54
59
  - [torch](https://pytorch.org/)
60
+ - [tabpfn](https://github.com/PriorLabs/TabPFN)
61
+ - [tabpfn-extensions](https://github.com/PriorLabs/tabpfn-extensions)
62
+ - [shap](https://shap.readthedocs.io/en/latest/)
63
+ - [hyperopt](https://hyperopt.github.io/hyperopt/)
64
+ - [pytest-is-running](https://github.com/adamchainz/pytest-is-running)
55
65
 
56
66
  ## Raison D'être :thought_balloon:
57
67
 
@@ -32,6 +32,7 @@ wavetrainer/model/catboost_model.py
32
32
  wavetrainer/model/catboost_regressor_wrap.py
33
33
  wavetrainer/model/model.py
34
34
  wavetrainer/model/model_router.py
35
+ wavetrainer/model/tabpfn_model.py
35
36
  wavetrainer/reducer/__init__.py
36
37
  wavetrainer/reducer/base_selector_reducer.py
37
38
  wavetrainer/reducer/combined_reducer.py
@@ -39,6 +40,7 @@ wavetrainer/reducer/constant_reducer.py
39
40
  wavetrainer/reducer/correlation_reducer.py
40
41
  wavetrainer/reducer/duplicate_reducer.py
41
42
  wavetrainer/reducer/nonnumeric_reducer.py
43
+ wavetrainer/reducer/pca_reducer.py
42
44
  wavetrainer/reducer/reducer.py
43
45
  wavetrainer/reducer/unseen_reducer.py
44
46
  wavetrainer/selector/__init__.py
@@ -9,4 +9,9 @@ catboost>=1.2.7
9
9
  venn-abers>=1.4.6
10
10
  mapie>=0.9.2
11
11
  pytz>=2025.1
12
- torch>=2.6.0
12
+ torch>=2.6.0
13
+ tabpfn>=2.0.6
14
+ tabpfn-extensions>=0.0.4
15
+ shap>=0.47.2
16
+ hyperopt>=0.2.7
17
+ pytest-is-running>=1.5.1
File without changes
File without changes
File without changes