wavetrainer 0.0.50__tar.gz → 0.0.52__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {wavetrainer-0.0.50/wavetrainer.egg-info → wavetrainer-0.0.52}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/setup.py +1 -1
  3. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/model_router.py +52 -2
  5. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/xgboost/xgboost_model.py +4 -1
  6. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/trainer.py +3 -3
  7. {wavetrainer-0.0.50 → wavetrainer-0.0.52/wavetrainer.egg-info}/PKG-INFO +1 -1
  8. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/LICENSE +0 -0
  9. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/MANIFEST.in +0 -0
  10. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/README.md +0 -0
  11. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/requirements.txt +0 -0
  12. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/setup.cfg +0 -0
  13. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/tests/__init__.py +0 -0
  14. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/tests/model/__init__.py +0 -0
  15. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/tests/model/catboost_kwargs_test.py +0 -0
  16. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/tests/trainer_test.py +0 -0
  17. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/calibrator/__init__.py +0 -0
  18. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/calibrator/calibrator.py +0 -0
  19. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/calibrator/calibrator_router.py +0 -0
  20. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/calibrator/mapie_calibrator.py +0 -0
  21. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  22. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/create.py +0 -0
  23. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/exceptions.py +0 -0
  24. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/fit.py +0 -0
  25. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/__init__.py +0 -0
  26. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/catboost/__init__.py +0 -0
  27. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  28. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
  29. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/catboost/catboost_model.py +0 -0
  30. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  31. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/model.py +0 -0
  32. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/tabpfn/__init__.py +0 -0
  33. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
  34. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/xgboost/__init__.py +0 -0
  35. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  36. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  37. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/model_type.py +0 -0
  38. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/params.py +0 -0
  39. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/__init__.py +0 -0
  40. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  41. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/combined_reducer.py +0 -0
  42. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/constant_reducer.py +0 -0
  43. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/correlation_reducer.py +0 -0
  44. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  45. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  46. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  47. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/pca_reducer.py +0 -0
  48. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/reducer.py +0 -0
  49. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  50. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  51. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/reducer/unseen_reducer.py +0 -0
  52. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/selector/__init__.py +0 -0
  53. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/selector/selector.py +0 -0
  54. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/__init__.py +0 -0
  55. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/class_weights.py +0 -0
  56. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/combined_weights.py +0 -0
  57. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/exponential_weights.py +0 -0
  58. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/linear_weights.py +0 -0
  59. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/noop_weights.py +0 -0
  60. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/sigmoid_weights.py +0 -0
  61. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/weights.py +0 -0
  62. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/weights/weights_router.py +0 -0
  63. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/windower/__init__.py +0 -0
  64. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer/windower/windower.py +0 -0
  65. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer.egg-info/SOURCES.txt +0 -0
  66. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer.egg-info/dependency_links.txt +0 -0
  67. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer.egg-info/not-zip-safe +0 -0
  68. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer.egg-info/requires.txt +0 -0
  69. {wavetrainer-0.0.50 → wavetrainer-0.0.52}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.50
3
+ Version: 0.0.52
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.50',
26
+ version='0.0.52',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.50"
5
+ __VERSION__ = "0.0.52"
6
6
  __all__ = ("create",)
@@ -1,19 +1,23 @@
1
1
  """A model class that routes to other models."""
2
2
 
3
+ import functools
3
4
  import json
4
5
  import os
5
6
  from typing import Self
6
7
 
7
8
  import optuna
8
9
  import pandas as pd
10
+ from sklearn.metrics import accuracy_score # type: ignore
9
11
 
12
+ from ..model_type import ModelType, determine_model_type
10
13
  from .catboost.catboost_model import CatboostModel
11
- from .model import Model
14
+ from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
12
15
  from .tabpfn.tabpfn_model import TabPFNModel
13
16
  from .xgboost.xgboost_model import XGBoostModel
14
17
 
15
18
  _MODEL_ROUTER_FILE = "model_router.json"
16
19
  _MODEL_KEY = "model"
20
+ _FALSE_POSITIVE_REDUCTION_STEPS_KEY = "false_positive_reduction_steps"
17
21
  _MODELS = {
18
22
  CatboostModel.name(): CatboostModel,
19
23
  TabPFNModel.name(): TabPFNModel,
@@ -27,10 +31,12 @@ class ModelRouter(Model):
27
31
  # pylint: disable=too-many-positional-arguments,too-many-arguments
28
32
 
29
33
  _model: Model | None
34
+ _false_positive_reduction_steps: int | None
30
35
 
31
36
  def __init__(self) -> None:
32
37
  super().__init__()
33
38
  self._model = None
39
+ self._false_positive_reduction_steps = None
34
40
 
35
41
  @classmethod
36
42
  def name(cls) -> str:
@@ -81,6 +87,9 @@ class ModelRouter(Model):
81
87
  def set_options(
82
88
  self, trial: optuna.Trial | optuna.trial.FrozenTrial, df: pd.DataFrame
83
89
  ) -> None:
90
+ self._false_positive_reduction_steps = trial.suggest_int(
91
+ _FALSE_POSITIVE_REDUCTION_STEPS_KEY, 0, 5
92
+ )
84
93
  model_name = trial.suggest_categorical(
85
94
  "model", [k for k, v in _MODELS.items() if v.supports_x(df)]
86
95
  )
@@ -122,7 +131,48 @@ class ModelRouter(Model):
122
131
  model = self._model
123
132
  if model is None:
124
133
  raise ValueError("model is null")
125
- model.fit(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
134
+ false_positive_reduction_steps = self._false_positive_reduction_steps
135
+ if false_positive_reduction_steps is None:
136
+ false_positive_reduction_steps = 0
137
+ for i in range(max(false_positive_reduction_steps, 1)):
138
+ print(f"False Positive Reduction Step: {i + 1}")
139
+ pred = model.fit_transform(df, y=y, w=w, eval_x=eval_x, eval_y=eval_y)
140
+ if (
141
+ w is None
142
+ or y is None
143
+ or determine_model_type(y) == ModelType.REGRESSION
144
+ ):
145
+ break
146
+ print(f"Accuracy: {accuracy_score(y, pred[PREDICTION_COLUMN])}")
147
+ pred["__wavetrain_correct"] = pred[PREDICTION_COLUMN] != y
148
+ pred["__wavetrain_error_weight"] = pred["__wavetrain_correct"].astype(float)
149
+ prob_columns = sorted(
150
+ [
151
+ x
152
+ for x in pred.columns.values.tolist()
153
+ if x.startswith(PROBABILITY_COLUMN_PREFIX)
154
+ ]
155
+ )
156
+ if prob_columns:
157
+
158
+ def determine_error_weight(
159
+ row: pd.Series, prob_columns: list[str]
160
+ ) -> float:
161
+ nonlocal y
162
+ if not row["__wavetrain_correct"]:
163
+ return abs(row[prob_columns[1 - int(y.loc[row.name])]]) # type: ignore
164
+ return 0.0
165
+
166
+ pred["__wavetrain_error_weight"] = pred.apply(
167
+ functools.partial(
168
+ determine_error_weight,
169
+ prob_columns=prob_columns,
170
+ ),
171
+ axis=1,
172
+ )
173
+ w += pred["__wavetrain_error_weight"]
174
+ w = w.clip(lower=0.0)
175
+
126
176
  return self
127
177
 
128
178
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -125,7 +125,10 @@ class XGBoostModel(Model):
125
125
  try:
126
126
  score_dict = bst.get_booster().get_score(importance_type="weight") # type: ignore
127
127
  total = sum(score_dict.values()) # type: ignore
128
- return {k: v / total for k, v in score_dict.items()} # type: ignore
128
+ return {
129
+ k: 0.0 if total == 0.0 else v / total # type: ignore
130
+ for k, v in score_dict.items() # type: ignore
131
+ } # type: ignore
129
132
  except XGBoostError as exc:
130
133
  print(str(exc))
131
134
  return {}
@@ -154,7 +154,7 @@ class Trainer(Fit):
154
154
  self._dt_column = dt_column
155
155
  self._max_train_timeout = max_train_timeout
156
156
  self._cutoff_dt = cutoff_dt
157
- self._embedding_cols = embedding_cols
157
+ self.embedding_cols = embedding_cols
158
158
 
159
159
  def _provide_study(self, column: str) -> optuna.Study:
160
160
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -249,7 +249,7 @@ class Trainer(Fit):
249
249
 
250
250
  # Perform common reductions
251
251
  start_reducer = time.time()
252
- reducer = CombinedReducer(self._embedding_cols)
252
+ reducer = CombinedReducer(self.embedding_cols)
253
253
  reducer.set_options(trial, x)
254
254
  x_train = reducer.fit_transform(x_train, y=y_train)
255
255
  x_test = reducer.transform(x_test)
@@ -514,7 +514,7 @@ class Trainer(Fit):
514
514
  date_str = dates[-1].isoformat()
515
515
  folder = os.path.join(column_path, date_str)
516
516
 
517
- reducer = CombinedReducer(self._embedding_cols)
517
+ reducer = CombinedReducer(self.embedding_cols)
518
518
  reducer.load(folder)
519
519
 
520
520
  model = ModelRouter()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.50
3
+ Version: 0.0.52
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes