wavetrainer 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {wavetrainer-0.0.6/wavetrainer.egg-info → wavetrainer-0.0.8}/PKG-INFO +1 -1
  2. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/setup.py +1 -1
  3. wavetrainer-0.0.8/tests/model/catboost_kwargs_test.py +28 -0
  4. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/tests/trainer_test.py +2 -1
  5. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/__init__.py +1 -1
  6. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/mapie_calibrator.py +1 -1
  7. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_classifier_wrap.py +1 -1
  8. wavetrainer-0.0.8/wavetrainer/model/catboost_kwargs.py +50 -0
  9. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_model.py +9 -4
  10. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/catboost_regressor_wrap.py +1 -1
  11. wavetrainer-0.0.8/wavetrainer/weights/__init__.py +0 -0
  12. {wavetrainer-0.0.6 → wavetrainer-0.0.8/wavetrainer.egg-info}/PKG-INFO +1 -1
  13. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/SOURCES.txt +2 -0
  14. wavetrainer-0.0.6/wavetrainer/model/catboost_kwargs.py +0 -35
  15. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/LICENSE +0 -0
  16. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/MANIFEST.in +0 -0
  17. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/README.md +0 -0
  18. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/requirements.txt +0 -0
  19. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/setup.cfg +0 -0
  20. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/tests/__init__.py +0 -0
  21. {wavetrainer-0.0.6/wavetrainer/weights → wavetrainer-0.0.8/tests/model}/__init__.py +0 -0
  22. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/__init__.py +0 -0
  23. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/calibrator.py +0 -0
  24. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/calibrator_router.py +0 -0
  25. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  26. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/create.py +0 -0
  27. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/exceptions.py +0 -0
  28. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/fit.py +0 -0
  29. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/__init__.py +0 -0
  30. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/model.py +0 -0
  31. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model/model_router.py +0 -0
  32. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/model_type.py +0 -0
  33. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/params.py +0 -0
  34. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/__init__.py +0 -0
  35. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  36. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/combined_reducer.py +0 -0
  37. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/constant_reducer.py +0 -0
  38. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/correlation_reducer.py +0 -0
  39. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  40. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  41. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/reducer/reducer.py +0 -0
  42. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/selector/__init__.py +0 -0
  43. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/selector/selector.py +0 -0
  44. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/trainer.py +0 -0
  45. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/class_weights.py +0 -0
  46. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/combined_weights.py +0 -0
  47. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/exponential_weights.py +0 -0
  48. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/linear_weights.py +0 -0
  49. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/noop_weights.py +0 -0
  50. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/sigmoid_weights.py +0 -0
  51. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/weights.py +0 -0
  52. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/weights/weights_router.py +0 -0
  53. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/windower/__init__.py +0 -0
  54. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer/windower/windower.py +0 -0
  55. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/dependency_links.txt +0 -0
  56. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/not-zip-safe +0 -0
  57. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/requires.txt +0 -0
  58. {wavetrainer-0.0.6 → wavetrainer-0.0.8}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.0.6',
26
+ version='0.0.8',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -0,0 +1,28 @@
1
+ """Tests for the catboost kwargs handler class."""
2
+ import unittest
3
+
4
+ import pandas as pd
5
+
6
+ from wavetrainer.model.catboost_kwargs import handle_fit_kwargs
7
+
8
+
9
+ class TestCatboostKwargs(unittest.TestCase):
10
+
11
+ def test_handle_fit_kwargs(self):
12
+ x_train = pd.DataFrame(data={
13
+ "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
14
+ })
15
+ x_train["thing"] = x_train["thing"].astype('category')
16
+ y_train = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
17
+ x_test = pd.DataFrame(data={
18
+ "thing": [0.0, 1.0, 2.0, 3.0, 4.0],
19
+ })
20
+ x_test["thing"] = x_test["thing"].astype('category')
21
+ y_test = pd.Series(data=[1.0, 2.0, 3.0, 4.0])
22
+ args, _ = handle_fit_kwargs(
23
+ x_train,
24
+ y_train,
25
+ eval_set=(x_test, y_test),
26
+ cat_features=x_train.select_dtypes(include="category").columns.tolist(),
27
+ )
28
+ assert len(args) == 2
@@ -20,10 +20,11 @@ class TestTrainer(unittest.TestCase):
20
20
  data={
21
21
  "column1": x_data,
22
22
  "column2": [(x * random.random()) + random.random() for x in x_data],
23
- "column3": [(x / random.random()) - random.random() for x in x_data],
23
+ "column3": [int(((x / random.random()) - random.random()) * 1000.0) for x in x_data],
24
24
  },
25
25
  index=x_index,
26
26
  )
27
+ df["column3"] = df["column3"].astype('category')
27
28
  y = pd.DataFrame(
28
29
  data={
29
30
  "y": [x % 2 == 0 for x in x_data],
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.0.6"
5
+ __VERSION__ = "0.0.8"
6
6
  __all__ = ("create",)
@@ -60,7 +60,7 @@ class MAPIECalibrator(Calibrator):
60
60
  try:
61
61
  alpha = []
62
62
  for potential_alpha in [0.05, 0.32]:
63
- if len(df) > int(1.0 / potential_alpha):
63
+ if len(df) > int(1.0 / potential_alpha) + 1:
64
64
  alpha.append(potential_alpha)
65
65
  if alpha:
66
66
  _, y_pis = self._mapie.predict(df, alpha=alpha)
@@ -11,5 +11,5 @@ class CatBoostClassifierWrapper(CatBoostClassifier):
11
11
  """A wrapper for the catboost classifier."""
12
12
 
13
13
  def fit(self, *args, **kwargs):
14
- kwargs = handle_fit_kwargs(*args, **kwargs)
14
+ args, kwargs = handle_fit_kwargs(*args, **kwargs)
15
15
  return super().fit(*args, **kwargs)
@@ -0,0 +1,50 @@
1
+ """A list of constant catboost kwargs."""
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ from catboost import Pool # type: ignore
7
+
8
+ ORIGINAL_X_ARG_KEY = "original_x"
9
+ EVAL_SET_ARG_KEY = "eval_set"
10
+ CAT_FEATURES_ARG_KEY = "cat_features"
11
+
12
+
13
+ def handle_fit_kwargs(*args, **kwargs) -> tuple[tuple[Any, ...], dict[str, Any]]:
14
+ """Handles keyword args coming into a catboost fit method."""
15
+ if ORIGINAL_X_ARG_KEY in kwargs:
16
+ df = kwargs[ORIGINAL_X_ARG_KEY]
17
+ eval_x, eval_y = kwargs[EVAL_SET_ARG_KEY]
18
+ cat_features = kwargs[CAT_FEATURES_ARG_KEY]
19
+ args_list = list(args)
20
+ fit_x = args_list[0]
21
+ fix_x_cp = fit_x.copy()
22
+
23
+ # Stupid code to ensure eval is feature equivalent to train data
24
+ included_columns = []
25
+ for i in range(fix_x_cp.shape[1]):
26
+ arr_col_values = fix_x_cp[:, i]
27
+ for col in df.columns:
28
+ if col in included_columns:
29
+ continue
30
+ df_col_values = df[col].values
31
+ if np.allclose(df_col_values, arr_col_values, equal_nan=True):
32
+ included_columns.append(col)
33
+ break
34
+ # We also need to update cat_features or catboost will yell at us
35
+ cat_features = list(
36
+ set(list(kwargs.get(CAT_FEATURES_ARG_KEY, []))) & set(included_columns)
37
+ )
38
+ args_list[0] = df[included_columns]
39
+ args = tuple(args_list)
40
+
41
+ eval_x = eval_x[included_columns]
42
+ kwargs[EVAL_SET_ARG_KEY] = Pool(
43
+ eval_x,
44
+ label=eval_y,
45
+ cat_features=cat_features,
46
+ )
47
+ kwargs[CAT_FEATURES_ARG_KEY] = cat_features
48
+
49
+ del kwargs[ORIGINAL_X_ARG_KEY]
50
+ return args, kwargs
@@ -10,7 +10,8 @@ from catboost import CatBoost, Pool # type: ignore
10
10
 
11
11
  from ..model_type import ModelType, determine_model_type
12
12
  from .catboost_classifier_wrap import CatBoostClassifierWrapper
13
- from .catboost_kwargs import EVAL_SET, ORIGINAL_X
13
+ from .catboost_kwargs import (CAT_FEATURES_ARG_KEY, EVAL_SET_ARG_KEY,
14
+ ORIGINAL_X_ARG_KEY)
14
15
  from .catboost_regressor_wrap import CatBoostRegressorWrapper
15
16
  from .model import PREDICTION_COLUMN, PROBABILITY_COLUMN_PREFIX, Model
16
17
 
@@ -66,9 +67,9 @@ class CatboostModel(Model):
66
67
  raise ValueError("y is null.")
67
68
  self._model_type = determine_model_type(y)
68
69
  return {
69
- EVAL_SET: (eval_x, eval_y),
70
- "cat_features": df.select_dtypes(include="category").columns.tolist(),
71
- ORIGINAL_X: df,
70
+ EVAL_SET_ARG_KEY: (eval_x, eval_y),
71
+ CAT_FEATURES_ARG_KEY: df.select_dtypes(include="category").columns.tolist(),
72
+ ORIGINAL_X_ARG_KEY: df,
72
73
  }
73
74
 
74
75
  def set_options(self, trial: optuna.Trial | optuna.trial.FrozenTrial) -> None:
@@ -122,6 +123,8 @@ class CatboostModel(Model):
122
123
  ) -> Self:
123
124
  if y is None:
124
125
  raise ValueError("y is null.")
126
+ if eval_x is None:
127
+ raise ValueError("eval_x is null.")
125
128
  self._model_type = determine_model_type(y)
126
129
  catboost = self._provide_catboost()
127
130
 
@@ -129,10 +132,12 @@ class CatboostModel(Model):
129
132
  df,
130
133
  label=y,
131
134
  weight=w,
135
+ cat_features=df.select_dtypes(include="category").columns.tolist(),
132
136
  )
133
137
  eval_pool = Pool(
134
138
  eval_x,
135
139
  label=eval_y,
140
+ cat_features=eval_x.select_dtypes(include="category").columns.tolist(),
136
141
  )
137
142
  catboost.fit(
138
143
  train_pool,
@@ -9,5 +9,5 @@ class CatBoostRegressorWrapper(CatBoostRegressor):
9
9
  """A wrapper for the catboost regressor."""
10
10
 
11
11
  def fit(self, *args, **kwargs):
12
- kwargs = handle_fit_kwargs(*args, **kwargs)
12
+ args, kwargs = handle_fit_kwargs(*args, **kwargs)
13
13
  return super().fit(*args, **kwargs)
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -5,6 +5,8 @@ requirements.txt
5
5
  setup.py
6
6
  tests/__init__.py
7
7
  tests/trainer_test.py
8
+ tests/model/__init__.py
9
+ tests/model/catboost_kwargs_test.py
8
10
  wavetrainer/__init__.py
9
11
  wavetrainer/create.py
10
12
  wavetrainer/exceptions.py
@@ -1,35 +0,0 @@
1
- """A list of constant catboost kwargs."""
2
-
3
- from typing import Any
4
-
5
- import numpy as np
6
- from catboost import Pool # type: ignore
7
-
8
- ORIGINAL_X = "original_x"
9
- EVAL_SET = "eval_set"
10
-
11
-
12
- def handle_fit_kwargs(*args, **kwargs) -> dict[str, Any]:
13
- """Handles keyword args coming into a catboost fit method."""
14
- if ORIGINAL_X in kwargs:
15
- df = kwargs[ORIGINAL_X]
16
- eval_x, eval_y = kwargs[EVAL_SET]
17
- fit_x = args[0]
18
- fix_x_cp = fit_x.copy()
19
-
20
- # Stupid code to ensure eval is feature equivalent to train data
21
- included_columns = []
22
- for i in range(fix_x_cp.shape[1]):
23
- arr_col_values = fix_x_cp[:, i]
24
- for col in df.columns:
25
- df_col_values = df[col].values
26
- if np.allclose(df_col_values, arr_col_values, equal_nan=True):
27
- included_columns.append(col)
28
- df = df.drop(col, axis=1)
29
- break
30
-
31
- eval_x = eval_x[included_columns]
32
- kwargs[EVAL_SET] = Pool(eval_x, label=eval_y)
33
-
34
- del kwargs[ORIGINAL_X]
35
- return kwargs
File without changes
File without changes
File without changes
File without changes