wavetrainer 0.3.16__tar.gz → 0.3.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {wavetrainer-0.3.16/wavetrainer.egg-info → wavetrainer-0.3.18}/PKG-INFO +1 -1
  2. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/setup.py +1 -1
  3. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/__init__.py +1 -1
  4. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_model.py +1 -1
  5. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/trainer.py +67 -25
  6. {wavetrainer-0.3.16 → wavetrainer-0.3.18/wavetrainer.egg-info}/PKG-INFO +1 -1
  7. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/LICENSE +0 -0
  8. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/MANIFEST.in +0 -0
  9. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/README.md +0 -0
  10. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/requirements.txt +0 -0
  11. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/setup.cfg +0 -0
  12. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/__init__.py +0 -0
  13. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/model/__init__.py +0 -0
  14. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/model/catboost_kwargs_test.py +0 -0
  15. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/trainer_test.py +0 -0
  16. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/__init__.py +0 -0
  17. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/calibrator.py +0 -0
  18. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/calibrator_router.py +0 -0
  19. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/conformal_quantile_calibration.py +0 -0
  20. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
  21. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/create.py +0 -0
  22. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/crps.py +0 -0
  23. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/exceptions.py +0 -0
  24. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/fit.py +0 -0
  25. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/__init__.py +0 -0
  26. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/__init__.py +0 -0
  27. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
  28. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
  29. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
  30. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/lightgbm/__init__.py +0 -0
  31. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/lightgbm/lightgbm_model.py +0 -0
  32. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/model.py +0 -0
  33. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/model_router.py +0 -0
  34. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/tabpfn/__init__.py +0 -0
  35. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
  36. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/__init__.py +0 -0
  37. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/early_stopper.py +0 -0
  38. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
  39. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
  40. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model_type.py +0 -0
  41. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/__init__.py +0 -0
  42. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/combined_normaliser.py +0 -0
  43. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/normaliser.py +0 -0
  44. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/powertransformer_normaliser.py +0 -0
  45. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/params.py +0 -0
  46. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/__init__.py +0 -0
  47. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/base_selector_reducer.py +0 -0
  48. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/combined_reducer.py +0 -0
  49. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/constant_reducer.py +0 -0
  50. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/correlation_reducer.py +0 -0
  51. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/duplicate_reducer.py +0 -0
  52. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/fast_correlation_based_reducer.py +0 -0
  53. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
  54. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
  55. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/pca_reducer.py +0 -0
  56. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/reducer.py +0 -0
  57. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
  58. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
  59. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/unseen_reducer.py +0 -0
  60. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/selector/__init__.py +0 -0
  61. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/selector/selector.py +0 -0
  62. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/stratified_brier_score_loss.py +0 -0
  63. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/threshold_callback.py +0 -0
  64. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/__init__.py +0 -0
  65. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/class_weights.py +0 -0
  66. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/combined_weights.py +0 -0
  67. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/exponential_weights.py +0 -0
  68. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/linear_weights.py +0 -0
  69. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/noop_weights.py +0 -0
  70. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/sigmoid_weights.py +0 -0
  71. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/weights.py +0 -0
  72. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/weights_router.py +0 -0
  73. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/windower/__init__.py +0 -0
  74. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/windower/windower.py +0 -0
  75. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/SOURCES.txt +0 -0
  76. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/dependency_links.txt +0 -0
  77. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/not-zip-safe +0 -0
  78. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/requires.txt +0 -0
  79. {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.3.16
3
+ Version: 0.3.18
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
23
23
 
24
24
  setup(
25
25
  name='wavetrainer',
26
- version='0.3.16',
26
+ version='0.3.18',
27
27
  description='A library for automatically finding the optimal model within feature and hyperparameter space.',
28
28
  long_description=long_description,
29
29
  long_description_content_type='text/markdown',
@@ -2,5 +2,5 @@
2
2
 
3
3
  from .create import create
4
4
 
5
- __VERSION__ = "0.3.16"
5
+ __VERSION__ = "0.3.18"
6
6
  __all__ = ("create",)
@@ -269,7 +269,7 @@ class CatboostModel(Model):
269
269
  cat_features=df.select_dtypes(include="category").columns.tolist(),
270
270
  )
271
271
  catboost = self._provide_catboost()
272
- pred = catboost.predict(pred_pool)
272
+ pred = catboost.predict(pred_pool, thread_count=1)
273
273
  if self._model_type == ModelType.QUANTILE_REGRESSION:
274
274
  df = pd.DataFrame(
275
275
  index=df.index,
@@ -14,6 +14,7 @@ import numpy as np
14
14
  import optuna
15
15
  import pandas as pd
16
16
  import tqdm
17
+ from joblib import Parallel, delayed # type: ignore
17
18
  from sklearn.metrics import f1_score # type: ignore
18
19
  from sklearn.metrics import (accuracy_score, brier_score_loss, log_loss,
19
20
  precision_score, r2_score, recall_score)
@@ -676,10 +677,13 @@ class Trainer(Fit):
676
677
  return self
677
678
 
678
679
  def transform(
679
- self, df: pd.DataFrame, optimistic: bool = False, ignore_no_dates: bool = False
680
+ self,
681
+ df: pd.DataFrame,
682
+ optimistic: bool = False,
683
+ ignore_no_dates: bool = False,
684
+ n_jobs: int = -1,
680
685
  ) -> pd.DataFrame:
681
- """Predict the expected values of the data."""
682
- # tqdm.tqdm.pandas(desc="Inferring...")
686
+ """Predict the expected values of the data running columns in parallel."""
683
687
  input_df = df.copy()
684
688
  df = df.reindex(sorted(df.columns), axis=1)
685
689
  feature_columns = df.columns.values
@@ -689,22 +693,24 @@ class Trainer(Fit):
689
693
  else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
690
694
  )
691
695
 
692
- for column in os.listdir(self._folder):
696
+ # --- INNER FUNCTION 1: Process a single column directory ---
697
+ def process_single_column(column: str):
693
698
  column_path = os.path.join(self._folder, column)
694
699
  if not os.path.isdir(column_path):
695
- continue
700
+ return None
701
+
696
702
  dates = []
697
703
  for date_str in os.listdir(column_path):
698
704
  date_path = os.path.join(column_path, date_str)
699
- if not os.path.isdir(date_path):
700
- continue
701
- if not os.listdir(date_path):
705
+ if not os.path.isdir(date_path) or not os.listdir(date_path):
702
706
  continue
703
707
  dates.append(datetime.datetime.fromisoformat(date_str))
708
+
704
709
  if not dates:
705
710
  if ignore_no_dates:
706
- continue
711
+ return None
707
712
  raise ValueError(f"no dates found for {column}.")
713
+
708
714
  dates = sorted(dates)
709
715
  bins: list[datetime.datetime] = sorted(
710
716
  [dt_index.min().to_pydatetime()]
@@ -712,12 +718,15 @@ class Trainer(Fit):
712
718
  + [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
713
719
  )
714
720
 
721
+ # --- INNER FUNCTION 2: The actual prediction logic ---
715
722
  def perform_predictions(
716
723
  group: pd.DataFrame,
717
724
  column_path: str,
718
725
  column: str,
719
726
  dates: list[datetime.datetime],
720
727
  ) -> pd.DataFrame:
728
+ group = group.copy() # Avoid SettingWithCopyWarning in parallel threads
729
+
721
730
  group_dt_index = (
722
731
  group.index
723
732
  if self._dt_column is None
@@ -740,7 +749,6 @@ class Trainer(Fit):
740
749
  filtered_dates = [dates[-1]]
741
750
  date_str = filtered_dates[-1].isoformat()
742
751
  folder = os.path.join(column_path, date_str)
743
- # print(f"Loading {folder}")
744
752
 
745
753
  try:
746
754
  reducer = self._provide_reducer(folder)
@@ -756,24 +764,27 @@ class Trainer(Fit):
756
764
  y_pred = calibrator.transform(
757
765
  y_pred if calibrator.predictions_as_x(None) else x_pred
758
766
  )
767
+
759
768
  for new_column in y_pred.columns.values:
760
769
  group["_".join([column, new_column])] = y_pred[new_column]
761
- except FileNotFoundError as exc:
762
- print(f"Model {folder} failed:")
763
- print(str(exc))
764
- except AttributeError as exc:
765
- print(f"Model {folder} failed:")
766
- print(str(exc))
767
- raise exc
770
+
771
+ except (FileNotFoundError, AttributeError) as exc:
772
+ # We keep this error print so you don't lose debugging info,
773
+ # but removed the standard progress print to keep tqdm clean.
774
+ print(f"\nModel {folder} failed:\n{str(exc)}")
775
+ if isinstance(exc, AttributeError):
776
+ raise exc
768
777
 
769
778
  return group
770
779
 
780
+ # Apply the grouped predictions
771
781
  old_index = dt_index.copy()
772
782
  df_group = df.groupby(
773
783
  dt_index.map(functools.partial(_assign_bin, bins=bins))
774
784
  )
785
+
775
786
  if len(df_group) == 1:
776
- df = df_group.apply( # type: ignore
787
+ df_out = df_group.apply(
777
788
  functools.partial(
778
789
  perform_predictions,
779
790
  column_path=column_path,
@@ -782,7 +793,8 @@ class Trainer(Fit):
782
793
  )
783
794
  )
784
795
  else:
785
- df = df_group.progress_apply( # type: ignore
796
+ # Using standard apply to avoid messy parallel tqdm console output
797
+ df_out = df_group.apply(
786
798
  functools.partial(
787
799
  perform_predictions,
788
800
  column_path=column_path,
@@ -790,15 +802,45 @@ class Trainer(Fit):
790
802
  dates=dates,
791
803
  )
792
804
  )
805
+
793
806
  if self._dt_column is None:
794
- df = df.set_index(old_index)
807
+ df_out = df_out.set_index(old_index)
808
+
809
+ if isinstance(df_out.index, pd.MultiIndex):
810
+ df_out = df_out.droplevel(0)
811
+
812
+ # EXTRACT: Only return the new prediction columns to prevent heavy IPC memory overhead
813
+ new_cols = [c for c in df_out.columns if c not in df.columns]
814
+ return df_out[new_cols]
815
+
816
+ # --- PARALLEL EXECUTION ---
817
+ columns_to_process = os.listdir(self._folder)
818
+
819
+ # Wrap the Parallel call in tqdm and set return_as="generator"
820
+ parallel_results = list(
821
+ tqdm.tqdm(
822
+ Parallel(n_jobs=n_jobs, return_as="generator")(
823
+ delayed(process_single_column)(column)
824
+ for column in columns_to_process
825
+ ),
826
+ total=len(columns_to_process),
827
+ desc="Inferring Columns",
828
+ )
829
+ )
830
+
831
+ # Filter out skipped directories (None) and empty dataframes
832
+ valid_results = [
833
+ res for res in parallel_results if res is not None and not res.empty
834
+ ]
795
835
 
796
- if isinstance(df.index, pd.MultiIndex):
797
- df = df.droplevel(0)
836
+ # Stitch all the newly created columns back onto the main dataframe
837
+ if valid_results:
838
+ df = pd.concat([df] + valid_results, axis=1)
839
+
840
+ # Ensure no original input columns were dropped
798
841
  for col in input_df.columns.values:
799
- if col in df.columns.values:
800
- continue
801
- df[col] = input_df[col]
842
+ if col not in df.columns.values:
843
+ df[col] = input_df[col]
802
844
 
803
845
  return df
804
846
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wavetrainer
3
- Version: 0.3.16
3
+ Version: 0.3.18
4
4
  Summary: A library for automatically finding the optimal model within feature and hyperparameter space.
5
5
  Home-page: https://github.com/8W9aG/wavetrainer
6
6
  Author: Will Sackfield
File without changes
File without changes
File without changes
File without changes