wavetrainer 0.1.11__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {wavetrainer-0.1.11/wavetrainer.egg-info → wavetrainer-0.1.12}/PKG-INFO +3 -1
  2. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/README.md +1 -0
  3. wavetrainer-0.1.11/wavetrainer.egg-info/requires.txt → wavetrainer-0.1.12/requirements.txt +1 -0
  4. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/setup.py +1 -1
  5. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/__init__.py +1 -1
  6. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/lightgbm/lightgbm_model.py +1 -1
  7. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/combined_reducer.py +1 -2
  8. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/trainer.py +69 -12
  9. {wavetrainer-0.1.11 → wavetrainer-0.1.12/wavetrainer.egg-info}/PKG-INFO +3 -1
  10. wavetrainer-0.1.11/requirements.txt → wavetrainer-0.1.12/wavetrainer.egg-info/requires.txt +2 -1
  11. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/LICENSE +0 -0
  12. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/MANIFEST.in +0 -0
  13. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/setup.cfg +0 -0
  14. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/__init__.py +0 -0
  15. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/model/__init__.py +0 -0
  16. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/model/catboost_kwargs_test.py +0 -0
  17. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/tests/trainer_test.py +0 -0
  18. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/__init__.py +0 -0
  19. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/calibrator.py +0 -0
  20. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/calibrator_router.py +0 -0
  21. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  22. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/create.py +0 -0
  23. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/exceptions.py +0 -0
  24. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/fit.py +0 -0
  25. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/__init__.py +0 -0
  26. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/__init__.py +0 -0
  27. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  28. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
  29. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_model.py +0 -0
  30. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  31. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/lightgbm/__init__.py +0 -0
  32. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/model.py +0 -0
  33. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/model_router.py +0 -0
  34. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/tabpfn/__init__.py +0 -0
  35. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
  36. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/__init__.py +0 -0
  37. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  38. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  39. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
  40. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/model_type.py +0 -0
  41. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/params.py +0 -0
  42. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/__init__.py +0 -0
  43. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  44. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/constant_reducer.py +0 -0
  45. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/correlation_reducer.py +0 -0
  46. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  47. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  48. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  49. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/pca_reducer.py +0 -0
  50. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/reducer.py +0 -0
  51. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  52. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  53. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/reducer/unseen_reducer.py +0 -0
  54. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/selector/__init__.py +0 -0
  55. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/selector/selector.py +0 -0
  56. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/__init__.py +0 -0
  57. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/class_weights.py +0 -0
  58. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/combined_weights.py +0 -0
  59. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/exponential_weights.py +0 -0
  60. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/linear_weights.py +0 -0
  61. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/noop_weights.py +0 -0
  62. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/sigmoid_weights.py +0 -0
  63. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/weights.py +0 -0
  64. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/weights/weights_router.py +0 -0
  65. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/windower/__init__.py +0 -0
  66. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer/windower/windower.py +0 -0
  67. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/SOURCES.txt +0 -0
  68. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/dependency_links.txt +0 -0
  69. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/not-zip-safe +0 -0
  70. {wavetrainer-0.1.11 → wavetrainer-0.1.12}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -30,6 +30,7 @@ Requires-Dist: tabpfn_extensions>=0.0.4
30
30
  Requires-Dist: hyperopt>=0.2.7
31
31
  Requires-Dist: pycaleva>=0.8.2
32
32
  Requires-Dist: lightgbm>=4.6.0
33
+ Requires-Dist: kaleido>=0.2.1
33
34
 
34
35
  # wavetrainer
35
36
 
@@ -66,6 +67,7 @@ Python 3.11.6:
66
67
  - [hyperopt](https://github.com/hyperopt/hyperopt)
67
68
  - [pycaleva](https://github.com/MartinWeigl/pycaleva)
68
69
  - [lightgbm](https://github.com/microsoft/LightGBM)
70
+ - [kaleido](https://github.com/plotly/Kaleido)
69
71
 
70
72
  ## Raison D'être :thought_balloon:
71
73
 
@@ -33,6 +33,7 @@ Python 3.11.6:
33
33
  - [hyperopt](https://github.com/hyperopt/hyperopt)
34
34
  - [pycaleva](https://github.com/MartinWeigl/pycaleva)
35
35
  - [lightgbm](https://github.com/microsoft/LightGBM)
36
+ - [kaleido](https://github.com/plotly/Kaleido)
36
37
 
37
38
  ## Raison D'être :thought_balloon:
38
39
 
@@ -17,3 +17,4 @@ tabpfn_extensions>=0.0.4
17
17
  hyperopt>=0.2.7
18
18
  pycaleva>=0.8.2
19
19
  lightgbm>=4.6.0
20
+ kaleido>=0.2.1
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.1.11',
26
+ version='0.1.12',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.1.11"
5
+ __VERSION__ = "0.1.12"
6
6
  __all__ = ("create",)
@@ -148,7 +148,7 @@ class LightGBMModel(Model):
148
148
 
149
149
  eval_set = None
150
150
  callbacks = []
151
- if eval_x is None or eval_y is None:
151
+ if eval_x is not None and eval_y is not None:
152
152
  eval_set = [(eval_x, eval_y.to_numpy().flatten())] # type: ignore
153
153
  callbacks = [
154
154
  lgb.early_stopping(stopping_rounds=early_stopping_rounds),
@@ -2,7 +2,6 @@
2
2
 
3
3
  # pylint: disable=line-too-long
4
4
  import json
5
- import logging
6
5
  import os
7
6
  import time
8
7
  from typing import Self
@@ -129,6 +128,6 @@ class CombinedReducer(Reducer):
129
128
  try:
130
129
  df = reducer.transform(df)
131
130
  except ValueError as exc:
132
- logging.warning("Failed to reduce %s", reducer.name())
131
+ print("Failed to reduce %s", reducer.name())
133
132
  raise exc
134
133
  return df
@@ -1,5 +1,6 @@
1
1
  """The trainer class."""
2
2
 
3
+ # pylint: disable=line-too-long
3
4
  import datetime
4
5
  import functools
5
6
  import json
@@ -12,12 +13,14 @@ from typing import Self
12
13
  import optuna
13
14
  import pandas as pd
14
15
  import tqdm
15
- from sklearn.metrics import f1_score, r2_score # type: ignore
16
+ from sklearn.metrics import f1_score # type: ignore
17
+ from sklearn.metrics import (accuracy_score, brier_score_loss, log_loss,
18
+ precision_score, r2_score, recall_score)
16
19
 
17
20
  from .calibrator.calibrator_router import CalibratorRouter
18
21
  from .exceptions import WavetrainException
19
22
  from .fit import Fit
20
- from .model.model import PREDICTION_COLUMN
23
+ from .model.model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX
21
24
  from .model.model_router import ModelRouter
22
25
  from .model_type import ModelType, determine_model_type
23
26
  from .reducer.combined_reducer import CombinedReducer
@@ -48,6 +51,11 @@ def _assign_bin(timestamp, bins: list[datetime.datetime]) -> int:
48
51
  return len(bins) - 2 # Assign to last bin if at the end
49
52
 
50
53
 
54
+ def _best_trial(study: optuna.Study) -> optuna.trial.FrozenTrial:
55
+ best_brier = min(study.best_trials, key=lambda t: t.values[1])
56
+ return best_brier
57
+
58
+
51
59
  class Trainer(Fit):
52
60
  """A class for training and predicting from an array of data."""
53
61
 
@@ -170,7 +178,10 @@ class Trainer(Fit):
170
178
  storage=storage_name,
171
179
  load_if_exists=True,
172
180
  sampler=restored_sampler,
173
- direction=optuna.study.StudyDirection.MAXIMIZE,
181
+ directions=[
182
+ optuna.study.StudyDirection.MAXIMIZE,
183
+ optuna.study.StudyDirection.MINIMIZE,
184
+ ],
174
185
  )
175
186
 
176
187
  def fit(
@@ -210,7 +221,7 @@ class Trainer(Fit):
210
221
  save: bool,
211
222
  split_idx: datetime.datetime,
212
223
  no_evaluation: bool,
213
- ) -> float:
224
+ ) -> tuple[float, float]:
214
225
  print(f"Beginning trial for: {split_idx.isoformat()}")
215
226
  trial.set_user_attr(_IDX_USR_ATTR_KEY, split_idx.isoformat())
216
227
  folder = os.path.join(
@@ -246,7 +257,7 @@ class Trainer(Fit):
246
257
  if new_folder:
247
258
  os.removedirs(folder)
248
259
  logging.warning("Y train only contains 1 unique datapoint.")
249
- return _BAD_OUTPUT
260
+ return _BAD_OUTPUT, -_BAD_OUTPUT
250
261
  print(f"Windowing took {time.time() - start_windower}")
251
262
 
252
263
  # Perform common reductions
@@ -311,10 +322,29 @@ class Trainer(Fit):
311
322
  )
312
323
  cal_pred[PREDICTION_COLUMN] = y_pred[PREDICTION_COLUMN]
313
324
  output = 0.0
325
+ loss = 0.0
314
326
  if determine_model_type(y_series) == ModelType.REGRESSION:
315
327
  output = float(r2_score(y_test, y_pred[[PREDICTION_COLUMN]]))
328
+ print(f"R2: {output}")
316
329
  else:
317
330
  output = float(f1_score(y_test, y_pred[[PREDICTION_COLUMN]]))
331
+ print(f"F1: {output}")
332
+ prob_col = PROBABILITY_COLUMN_PREFIX + str(1)
333
+ if prob_col in y_pred.columns.values.tolist():
334
+ loss = float(brier_score_loss(y_test, y_pred[[prob_col]]))
335
+ print(f"Brier: {loss}")
336
+ print(
337
+ f"Log Loss: {float(log_loss(y_test.astype(float), y_pred[[prob_col]]))}"
338
+ )
339
+ print(
340
+ f"Accuracy: {float(accuracy_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
341
+ )
342
+ print(
343
+ f"Precision: {float(precision_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
344
+ )
345
+ print(
346
+ f"Recall: {float(recall_score(y_test, y_pred[[PREDICTION_COLUMN]]))}"
347
+ )
318
348
 
319
349
  if save:
320
350
  windower.save(folder, trial)
@@ -332,13 +362,13 @@ class Trainer(Fit):
332
362
  handle,
333
363
  )
334
364
 
335
- return output
365
+ return output, loss
336
366
  except WavetrainException as exc:
337
367
  print(str(exc))
338
368
  logging.warning(str(exc))
339
369
  if new_folder:
340
370
  os.removedirs(folder)
341
- return _BAD_OUTPUT
371
+ return _BAD_OUTPUT, -_BAD_OUTPUT
342
372
 
343
373
  start_validation_index = (
344
374
  dt_index.to_list()[-int(len(dt_index) * self._validation_size) - 1]
@@ -359,7 +389,7 @@ class Trainer(Fit):
359
389
  ].to_list()[0]
360
390
  )
361
391
 
362
- def test_objective(trial: optuna.Trial) -> float:
392
+ def test_objective(trial: optuna.Trial) -> tuple[float, float]:
363
393
  return _fit(
364
394
  trial,
365
395
  test_df,
@@ -382,7 +412,8 @@ class Trainer(Fit):
382
412
  else self._max_train_timeout.total_seconds(),
383
413
  )
384
414
  while (
385
- study.best_trial.value is None or study.best_trial.value == _BAD_OUTPUT
415
+ _best_trial(study).values is None
416
+ or _best_trial(study).values == (_BAD_OUTPUT, -_BAD_OUTPUT)
386
417
  ) and len(study.trials) < 1000:
387
418
  logging.info("Performing extra train")
388
419
  study.optimize(
@@ -420,7 +451,7 @@ class Trainer(Fit):
420
451
  if found:
421
452
  last_processed_dt = test_dt
422
453
  _fit(
423
- study.best_trial,
454
+ _best_trial(study),
424
455
  test_df.copy(),
425
456
  test_series,
426
457
  True,
@@ -441,7 +472,7 @@ class Trainer(Fit):
441
472
 
442
473
  def validate_objctive(
443
474
  trial: optuna.Trial, idx: datetime.datetime, series: pd.Series
444
- ) -> float:
475
+ ) -> tuple[float, float]:
445
476
  return _fit(trial, test_df.copy(), series, False, idx, False)
446
477
 
447
478
  study.optimize(
@@ -457,10 +488,36 @@ class Trainer(Fit):
457
488
  break
458
489
 
459
490
  _fit(
460
- study.best_trial, test_df.copy(), test_series, True, test_idx, True
491
+ _best_trial(study),
492
+ test_df.copy(),
493
+ test_series,
494
+ True,
495
+ test_idx,
496
+ True,
461
497
  )
462
498
  last_processed_dt = test_idx
463
499
 
500
+ target_names = ["F1", "Brier"]
501
+ fig = optuna.visualization.plot_pareto_front(
502
+ study, target_names=target_names
503
+ )
504
+ fig.write_image(
505
+ os.path.join(column_dir, "pareto_frontier.png"),
506
+ format="png",
507
+ width=800,
508
+ height=600,
509
+ )
510
+ for target_name in target_names:
511
+ fig = optuna.visualization.plot_param_importances(
512
+ study, target=lambda t: t.values[0], target_name=target_name
513
+ )
514
+ fig.write_image(
515
+ os.path.join(column_dir, f"{target_name}_frontier.png"),
516
+ format="png",
517
+ width=800,
518
+ height=600,
519
+ )
520
+
464
521
  if isinstance(y, pd.Series):
465
522
  _fit_column(y)
466
523
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -30,6 +30,7 @@ Requires-Dist: tabpfn_extensions>=0.0.4
30
30
  Requires-Dist: hyperopt>=0.2.7
31
31
  Requires-Dist: pycaleva>=0.8.2
32
32
  Requires-Dist: lightgbm>=4.6.0
33
+ Requires-Dist: kaleido>=0.2.1
33
34
 
34
35
  # wavetrainer
35
36
 
@@ -66,6 +67,7 @@ Python 3.11.6:
66
67
  - [hyperopt](https://github.com/hyperopt/hyperopt)
67
68
  - [pycaleva](https://github.com/MartinWeigl/pycaleva)
68
69
  - [lightgbm](https://github.com/microsoft/LightGBM)
70
+ - [kaleido](https://github.com/plotly/Kaleido)
69
71
 
70
72
  ## Raison D'être :thought_balloon:
71
73
 
@@ -16,4 +16,5 @@ jax>=0.6.1
16
16
  tabpfn_extensions>=0.0.4
17
17
  hyperopt>=0.2.7
18
18
  pycaleva>=0.8.2
19
- lightgbm>=4.6.0
19
+ lightgbm>=4.6.0
20
+ kaleido>=0.2.1
File without changes
File without changes
File without changes