wavetrainer 0.1.15__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {wavetrainer-0.1.15/wavetrainer.egg-info → wavetrainer-0.1.16}/PKG-INFO +1 -1
  2. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/setup.py +1 -1
  3. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/create.py +2 -0
  4. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/combined_reducer.py +10 -3
  5. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/correlation_reducer.py +5 -2
  6. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/trainer.py +15 -4
  7. {wavetrainer-0.1.15 → wavetrainer-0.1.16/wavetrainer.egg-info}/PKG-INFO +1 -1
  8. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/LICENSE +0 -0
  9. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/MANIFEST.in +0 -0
  10. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/README.md +0 -0
  11. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/requirements.txt +0 -0
  12. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/setup.cfg +0 -0
  13. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/tests/__init__.py +0 -0
  14. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/tests/model/__init__.py +0 -0
  15. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/tests/model/catboost_kwargs_test.py +0 -0
  16. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/tests/trainer_test.py +0 -0
  17. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/__init__.py +0 -0
  18. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/calibrator/__init__.py +0 -0
  19. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/calibrator/calibrator.py +0 -0
  20. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/calibrator/calibrator_router.py +0 -0
  21. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  22. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/exceptions.py +0 -0
  23. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/fit.py +0 -0
  24. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/__init__.py +0 -0
  25. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/catboost/__init__.py +0 -0
  26. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  27. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
  28. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/catboost/catboost_model.py +0 -0
  29. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  30. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/lightgbm/__init__.py +0 -0
  31. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/lightgbm/lightgbm_model.py +0 -0
  32. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/model.py +0 -0
  33. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/model_router.py +0 -0
  34. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/tabpfn/__init__.py +0 -0
  35. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
  36. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/xgboost/__init__.py +0 -0
  37. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  38. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  39. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
  40. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/model_type.py +0 -0
  41. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/params.py +0 -0
  42. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/__init__.py +0 -0
  43. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  44. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/constant_reducer.py +0 -0
  45. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  46. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  47. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  48. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/pca_reducer.py +0 -0
  49. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/reducer.py +0 -0
  50. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  51. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  52. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/reducer/unseen_reducer.py +0 -0
  53. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/selector/__init__.py +0 -0
  54. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/selector/selector.py +0 -0
  55. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/__init__.py +0 -0
  56. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/class_weights.py +0 -0
  57. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/combined_weights.py +0 -0
  58. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/exponential_weights.py +0 -0
  59. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/linear_weights.py +0 -0
  60. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/noop_weights.py +0 -0
  61. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/sigmoid_weights.py +0 -0
  62. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/weights.py +0 -0
  63. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/weights/weights_router.py +0 -0
  64. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/windower/__init__.py +0 -0
  65. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer/windower/windower.py +0 -0
  66. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer.egg-info/SOURCES.txt +0 -0
  67. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer.egg-info/dependency_links.txt +0 -0
  68. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer.egg-info/not-zip-safe +0 -0
  69. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer.egg-info/requires.txt +0 -0
  70. {wavetrainer-0.1.15 → wavetrainer-0.1.16}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.1.15',
26
+ version='0.1.16',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -18,6 +18,7 @@ def create(
18
18
  embedding_cols: list[list[str]] | None = None,
19
19
  allowed_models: set[str] | None = None,
20
20
  max_false_positive_reduction_steps: int | None = None,
21
+ correlation_chunk_size: int | None = None,
21
22
  ) -> Trainer:
22
23
  """Create a trainer."""
23
24
  return Trainer(
@@ -31,4 +32,5 @@ def create(
31
32
  embedding_cols=embedding_cols,
32
33
  allowed_models=allowed_models,
33
34
  max_false_positive_reduction_steps=max_false_positive_reduction_steps,
35
+ correlation_chunk_size=correlation_chunk_size,
34
36
  )
@@ -31,20 +31,25 @@ class CombinedReducer(Reducer):
31
31
  # pylint: disable=too-many-positional-arguments,too-many-arguments
32
32
  _folder: str | None
33
33
 
34
- def __init__(self, embedding_cols: list[list[str]] | None):
34
+ def __init__(
35
+ self, embedding_cols: list[list[str]] | None, correlation_chunk_size: int | None
36
+ ):
35
37
  super().__init__()
38
+ if correlation_chunk_size is None:
39
+ correlation_chunk_size = 500
36
40
  self._reducers = [
37
41
  UnseenReducer(),
38
42
  NonNumericReducer(),
39
43
  PCAReducer(embedding_cols),
40
44
  ConstantReducer(),
41
45
  DuplicateReducer(),
42
- CorrelationReducer(),
46
+ CorrelationReducer(correlation_chunk_size=correlation_chunk_size),
43
47
  SmartCorrelationReducer(),
44
48
  # SelectBySingleFeaturePerformanceReducer(),
45
49
  ]
46
50
  self._folder = None
47
51
  self._embedding_cols = embedding_cols
52
+ self._correlation_chunk_size = correlation_chunk_size
48
53
 
49
54
  @classmethod
50
55
  def name(cls) -> str:
@@ -68,7 +73,9 @@ class CombinedReducer(Reducer):
68
73
  elif reducer_name == DuplicateReducer.name():
69
74
  self._reducers.append(DuplicateReducer())
70
75
  elif reducer_name == CorrelationReducer.name():
71
- self._reducers.append(CorrelationReducer())
76
+ self._reducers.append(
77
+ CorrelationReducer(self._correlation_chunk_size)
78
+ )
72
79
  elif reducer_name == NonNumericReducer.name():
73
80
  self._reducers.append(NonNumericReducer())
74
81
  elif reducer_name == UnseenReducer.name():
@@ -82,9 +82,10 @@ class CorrelationReducer(Reducer):
82
82
 
83
83
  _correlation_drop_features: dict[str, bool]
84
84
 
85
- def __init__(self) -> None:
85
+ def __init__(self, correlation_chunk_size: int) -> None:
86
86
  self._threshold = 0.0
87
87
  self._correlation_drop_features = {}
88
+ self._correlation_chunk_size = correlation_chunk_size
88
89
 
89
90
  @classmethod
90
91
  def name(cls) -> str:
@@ -116,7 +117,9 @@ class CorrelationReducer(Reducer):
116
117
  eval_y: pd.Series | pd.DataFrame | None = None,
117
118
  ) -> Self:
118
119
  drop_features = _get_correlated_features_to_drop_chunked(
119
- df, threshold=self._threshold
120
+ df.copy(),
121
+ threshold=self._threshold,
122
+ chunk_size=self._correlation_chunk_size,
120
123
  )
121
124
  self._correlation_drop_features = {x: True for x in drop_features}
122
125
  return self
@@ -42,6 +42,7 @@ _VALIDATION_SIZE_KEY = "validation_size"
42
42
  _IDX_USR_ATTR_KEY = "idx"
43
43
  _DT_COLUMN_KEY = "dt_column"
44
44
  _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY = "max_false_positive_reduction_steps"
45
+ _CORRELATION_CHUNK_SIZE_KEY = "correlation_chunk_size"
45
46
  _BAD_OUTPUT = -1000.0
46
47
 
47
48
 
@@ -75,6 +76,7 @@ class Trainer(Fit):
75
76
  embedding_cols: list[list[str]] | None = None,
76
77
  allowed_models: set[str] | None = None,
77
78
  max_false_positive_reduction_steps: int | None = None,
79
+ correlation_chunk_size: int | None = None,
78
80
  ):
79
81
  tqdm.tqdm.pandas()
80
82
 
@@ -129,6 +131,8 @@ class Trainer(Fit):
129
131
  max_false_positive_reduction_steps = params.get(
130
132
  _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY
131
133
  )
134
+ if correlation_chunk_size is None:
135
+ correlation_chunk_size = params.get(_CORRELATION_CHUNK_SIZE_KEY)
132
136
  else:
133
137
  with open(params_file, "w", encoding="utf8") as handle:
134
138
  validation_size_value = None
@@ -160,6 +164,7 @@ class Trainer(Fit):
160
164
  _VALIDATION_SIZE_KEY: validation_size_value,
161
165
  _DT_COLUMN_KEY: dt_column,
162
166
  _MAX_FALSE_POSITIVE_REDUCTION_STEPS_KEY: max_false_positive_reduction_steps,
167
+ _CORRELATION_CHUNK_SIZE_KEY: correlation_chunk_size,
163
168
  },
164
169
  handle,
165
170
  )
@@ -173,6 +178,7 @@ class Trainer(Fit):
173
178
  self.embedding_cols = embedding_cols
174
179
  self._allowed_models = allowed_models
175
180
  self._max_false_positive_reduction_steps = max_false_positive_reduction_steps
181
+ self._correlation_chunk_size = correlation_chunk_size
176
182
 
177
183
  def _provide_study(self, column: str) -> optuna.Study:
178
184
  storage_name = f"sqlite:///{self._folder}/{column}/{_STUDYDB_FILENAME}"
@@ -246,7 +252,8 @@ class Trainer(Fit):
246
252
  "Found trial %d previously executed, skipping...",
247
253
  trial.number,
248
254
  )
249
- return trial_info["output"]
255
+ return tuple(trial_info["output"])
256
+ print("Retraining for different trial number.")
250
257
 
251
258
  train_dt_index = dt_index[: len(x)]
252
259
  x_train = x[train_dt_index < split_idx] # type: ignore
@@ -270,7 +277,9 @@ class Trainer(Fit):
270
277
 
271
278
  # Perform common reductions
272
279
  start_reducer = time.time()
273
- reducer = CombinedReducer(self.embedding_cols)
280
+ reducer = CombinedReducer(
281
+ self.embedding_cols, self._correlation_chunk_size
282
+ )
274
283
  reducer.set_options(trial, x)
275
284
  x_train = reducer.fit_transform(x_train, y=y_train)
276
285
  x_test = reducer.transform(x_test)
@@ -367,7 +376,7 @@ class Trainer(Fit):
367
376
  json.dump(
368
377
  {
369
378
  "number": trial.number,
370
- "output": output,
379
+ "output": [output, loss],
371
380
  },
372
381
  handle,
373
382
  )
@@ -583,7 +592,9 @@ class Trainer(Fit):
583
592
  date_str = dates[-1].isoformat()
584
593
  folder = os.path.join(column_path, date_str)
585
594
 
586
- reducer = CombinedReducer(self.embedding_cols)
595
+ reducer = CombinedReducer(
596
+ self.embedding_cols, self._correlation_chunk_size
597
+ )
587
598
  reducer.load(folder)
588
599
 
589
600
  model = ModelRouter(None, None)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes