wavetrainer 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {wavetrainer-0.1.7/wavetrainer.egg-info → wavetrainer-0.1.9}/PKG-INFO +3 -1
  2. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/README.md +1 -0
  3. wavetrainer-0.1.7/wavetrainer.egg-info/requires.txt → wavetrainer-0.1.9/requirements.txt +1 -0
  4. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/setup.py +1 -1
  5. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/__init__.py +1 -1
  6. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/calibrator_router.py +32 -1
  7. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/create.py +2 -0
  8. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/model_router.py +10 -2
  9. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/trainer.py +5 -3
  10. {wavetrainer-0.1.7 → wavetrainer-0.1.9/wavetrainer.egg-info}/PKG-INFO +3 -1
  11. wavetrainer-0.1.7/requirements.txt → wavetrainer-0.1.9/wavetrainer.egg-info/requires.txt +2 -1
  12. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/LICENSE +0 -0
  13. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/MANIFEST.in +0 -0
  14. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/setup.cfg +0 -0
  15. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/__init__.py +0 -0
  16. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/model/__init__.py +0 -0
  17. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/model/catboost_kwargs_test.py +0 -0
  18. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/tests/trainer_test.py +0 -0
  19. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/__init__.py +0 -0
  20. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/calibrator.py +0 -0
  21. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  22. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/exceptions.py +0 -0
  23. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/fit.py +0 -0
  24. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/__init__.py +0 -0
  25. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/__init__.py +0 -0
  26. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  27. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
  28. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_model.py +0 -0
  29. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  30. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/model.py +0 -0
  31. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/tabpfn/__init__.py +0 -0
  32. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
  33. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/__init__.py +0 -0
  34. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  35. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  36. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
  37. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/model_type.py +0 -0
  38. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/params.py +0 -0
  39. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/__init__.py +0 -0
  40. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  41. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/combined_reducer.py +0 -0
  42. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/constant_reducer.py +0 -0
  43. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/correlation_reducer.py +0 -0
  44. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  45. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  46. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  47. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/pca_reducer.py +0 -0
  48. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/reducer.py +0 -0
  49. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  50. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  51. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/reducer/unseen_reducer.py +0 -0
  52. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/selector/__init__.py +0 -0
  53. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/selector/selector.py +0 -0
  54. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/__init__.py +0 -0
  55. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/class_weights.py +0 -0
  56. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/combined_weights.py +0 -0
  57. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/exponential_weights.py +0 -0
  58. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/linear_weights.py +0 -0
  59. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/noop_weights.py +0 -0
  60. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/sigmoid_weights.py +0 -0
  61. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/weights.py +0 -0
  62. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/weights/weights_router.py +0 -0
  63. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/windower/__init__.py +0 -0
  64. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer/windower/windower.py +0 -0
  65. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/SOURCES.txt +0 -0
  66. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/dependency_links.txt +0 -0
  67. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/not-zip-safe +0 -0
  68. {wavetrainer-0.1.7 → wavetrainer-0.1.9}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -28,6 +28,7 @@ Requires-Dist: xgboost>=3.0.0
28
28
  Requires-Dist: jax>=0.6.1
29
29
  Requires-Dist: tabpfn_extensions>=0.0.4
30
30
  Requires-Dist: hyperopt>=0.2.7
31
+ Requires-Dist: pycaleva>=0.8.2
31
32
 
32
33
  # wavetrainer
33
34
 
@@ -62,6 +63,7 @@ Python 3.11.6:
62
63
  - [jax](https://github.com/jax-ml/jax)
63
64
  - [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
64
65
  - [hyperopt](https://github.com/hyperopt/hyperopt)
66
+ - [pycaleva](https://github.com/MartinWeigl/pycaleva)
65
67
 
66
68
  ## Raison D'être :thought_balloon:
67
69
 
@@ -31,6 +31,7 @@ Python 3.11.6:
31
31
  - [jax](https://github.com/jax-ml/jax)
32
32
  - [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
33
33
  - [hyperopt](https://github.com/hyperopt/hyperopt)
34
+ - [pycaleva](https://github.com/MartinWeigl/pycaleva)
34
35
 
35
36
  ## Raison D'être :thought_balloon:
36
37
 
@@ -15,3 +15,4 @@ xgboost>=3.0.0
15
15
  jax>=0.6.1
16
16
  tabpfn_extensions>=0.0.4
17
17
  hyperopt>=0.2.7
18
+ pycaleva>=0.8.2
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.1.7',
26
+ version='0.1.9',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.1.7"
5
+ __VERSION__ = "0.1.9"
6
6
  __all__ = ("create",)
@@ -5,10 +5,12 @@ import logging
5
5
  import os
6
6
  from typing import Self
7
7
 
8
+ import numpy as np
8
9
  import optuna
9
10
  import pandas as pd
11
+ from pycaleva import CalibrationEvaluator # type: ignore
10
12
 
11
- from ..model.model import Model
13
+ from ..model.model import PROBABILITY_COLUMN_PREFIX, Model
12
14
  from ..model_type import ModelType, determine_model_type
13
15
  from .calibrator import Calibrator
14
16
  from .vennabers_calibrator import VennabersCalibrator
@@ -26,10 +28,12 @@ class CalibratorRouter(Calibrator):
26
28
  # pylint: disable=too-many-positional-arguments,too-many-arguments
27
29
 
28
30
  _calibrator: Calibrator | None
31
+ _ce: CalibrationEvaluator | None
29
32
 
30
33
  def __init__(self, model: Model):
31
34
  super().__init__(model)
32
35
  self._calibrator = None
36
+ self._ce = None
33
37
 
34
38
  @classmethod
35
39
  def name(cls) -> str:
@@ -75,6 +79,14 @@ class CalibratorRouter(Calibrator):
75
79
  },
76
80
  handle,
77
81
  )
82
+ ce = self._ce
83
+ if ce is not None:
84
+ try:
85
+ ce.calibration_report(
86
+ os.path.join(folder, "calibration.pdf"), "binary-classifier"
87
+ )
88
+ except ValueError as exc:
89
+ logging.warning(str(exc))
78
90
 
79
91
  def fit(
80
92
  self,
@@ -94,6 +106,25 @@ class CalibratorRouter(Calibrator):
94
106
  calibrator = VennabersCalibrator(self._model)
95
107
  calibrator.fit(df, y=y, w=w)
96
108
  self._calibrator = calibrator
109
+
110
+ pred_prob = calibrator.transform(df)
111
+ pred_prob = pred_prob.drop(
112
+ columns=[
113
+ x
114
+ for x in pred_prob.columns.values.tolist()
115
+ if not x.startswith(PROBABILITY_COLUMN_PREFIX)
116
+ ],
117
+ errors="ignore",
118
+ )
119
+ ce = CalibrationEvaluator(
120
+ y.to_numpy(),
121
+ np.max(pred_prob.to_numpy(), axis=1),
122
+ outsample=True,
123
+ n_groups="auto",
124
+ )
125
+ print(f"Hosmer Lemeshow: {ce.hosmerlemeshow()}")
126
+ self._ce = ce
127
+
97
128
  return self
98
129
 
99
130
  def transform(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -16,6 +16,7 @@ def create(
16
16
  max_train_timeout: datetime.timedelta | None = None,
17
17
  cutoff_dt: datetime.datetime | None = None,
18
18
  embedding_cols: list[list[str]] | None = None,
19
+ allowed_models: set[str] | None = None,
19
20
  ) -> Trainer:
20
21
  """Create a trainer."""
21
22
  return Trainer(
@@ -27,4 +28,5 @@ def create(
27
28
  max_train_timeout=max_train_timeout,
28
29
  cutoff_dt=cutoff_dt,
29
30
  embedding_cols=embedding_cols,
31
+ allowed_models=allowed_models,
30
32
  )
@@ -33,10 +33,13 @@ class ModelRouter(Model):
33
33
  _model: Model | None
34
34
  _false_positive_reduction_steps: int | None
35
35
 
36
- def __init__(self) -> None:
36
+ def __init__(self, allowed_models: set[str] | None) -> None:
37
37
  super().__init__()
38
38
  self._model = None
39
39
  self._false_positive_reduction_steps = None
40
+ self._allowed_models = (
41
+ allowed_models if allowed_models is not None else set(_MODELS.keys())
42
+ )
40
43
 
41
44
  @classmethod
42
45
  def name(cls) -> str:
@@ -91,7 +94,12 @@ class ModelRouter(Model):
91
94
  _FALSE_POSITIVE_REDUCTION_STEPS_KEY, 0, 5
92
95
  )
93
96
  model_name = trial.suggest_categorical(
94
- "model", [k for k, v in _MODELS.items() if v.supports_x(df)]
97
+ "model",
98
+ [
99
+ k
100
+ for k, v in _MODELS.items()
101
+ if v.supports_x(df) and k in self._allowed_models
102
+ ],
95
103
  )
96
104
  print(f"Using {model_name} model")
97
105
  model = _MODELS[model_name]()
@@ -64,6 +64,7 @@ class Trainer(Fit):
64
64
  max_train_timeout: datetime.timedelta | None = None,
65
65
  cutoff_dt: datetime.datetime | None = None,
66
66
  embedding_cols: list[list[str]] | None = None,
67
+ allowed_models: set[str] | None = None,
67
68
  ):
68
69
  tqdm.tqdm.pandas()
69
70
 
@@ -155,6 +156,7 @@ class Trainer(Fit):
155
156
  self._max_train_timeout = max_train_timeout
156
157
  self._cutoff_dt = cutoff_dt
157
158
  self.embedding_cols = embedding_cols
159
+ self._allowed_models = allowed_models
158
160
 
159
161
  def _provide_study(self, column: str) -> optuna.Study:
160
162
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -265,7 +267,7 @@ class Trainer(Fit):
265
267
  print(f"Row weights took {time.time() - start_row_weights}")
266
268
 
267
269
  # Create model
268
- model = ModelRouter()
270
+ model = ModelRouter(self._allowed_models)
269
271
  model.set_options(trial, x)
270
272
 
271
273
  # Train
@@ -517,7 +519,7 @@ class Trainer(Fit):
517
519
  reducer = CombinedReducer(self.embedding_cols)
518
520
  reducer.load(folder)
519
521
 
520
- model = ModelRouter()
522
+ model = ModelRouter(None)
521
523
  model.load(folder)
522
524
 
523
525
  selector = Selector(model)
@@ -570,7 +572,7 @@ class Trainer(Fit):
570
572
  if not os.path.isdir(date_path):
571
573
  continue
572
574
  try:
573
- model = ModelRouter()
575
+ model = ModelRouter(None)
574
576
  model.load(date_path)
575
577
  feature_importances[date_str] = model.feature_importances
576
578
  except FileNotFoundError as exc:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -28,6 +28,7 @@ Requires-Dist: xgboost>=3.0.0
28
28
  Requires-Dist: jax>=0.6.1
29
29
  Requires-Dist: tabpfn_extensions>=0.0.4
30
30
  Requires-Dist: hyperopt>=0.2.7
31
+ Requires-Dist: pycaleva>=0.8.2
31
32
 
32
33
  # wavetrainer
33
34
 
@@ -62,6 +63,7 @@ Python 3.11.6:
62
63
  - [jax](https://github.com/jax-ml/jax)
63
64
  - [tabpfn_extensions](https://github.com/PriorLabs/tabpfn-extensions)
64
65
  - [hyperopt](https://github.com/hyperopt/hyperopt)
66
+ - [pycaleva](https://github.com/MartinWeigl/pycaleva)
65
67
 
66
68
  ## Raison D'être :thought_balloon:
67
69
 
@@ -14,4 +14,5 @@ pytest-is-running>=1.5.1
14
14
  xgboost>=3.0.0
15
15
  jax>=0.6.1
16
16
  tabpfn_extensions>=0.0.4
17
- hyperopt>=0.2.7
17
+ hyperopt>=0.2.7
18
+ pycaleva>=0.8.2
File without changes
File without changes
File without changes