wavetrainer 0.3.16__tar.gz → 0.3.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavetrainer-0.3.16/wavetrainer.egg-info → wavetrainer-0.3.18}/PKG-INFO +1 -1
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/setup.py +1 -1
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/__init__.py +1 -1
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_model.py +1 -1
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/trainer.py +67 -25
- {wavetrainer-0.3.16 → wavetrainer-0.3.18/wavetrainer.egg-info}/PKG-INFO +1 -1
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/LICENSE +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/MANIFEST.in +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/README.md +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/requirements.txt +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/setup.cfg +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/model/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/model/catboost_kwargs_test.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/tests/trainer_test.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/calibrator.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/calibrator_router.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/conformal_quantile_calibration.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/vennabers_calibrator.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/create.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/crps.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/exceptions.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/fit.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_classifier_wrap.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_kwargs.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_regressor_wrap.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/lightgbm/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/lightgbm/lightgbm_model.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/model.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/model_router.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/tabpfn/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/tabpfn/tabpfn_model.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/early_stopper.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/xgboost_logger.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/xgboost/xgboost_model.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model_type.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/combined_normaliser.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/normaliser.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/powertransformer_normaliser.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/params.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/base_selector_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/combined_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/constant_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/correlation_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/duplicate_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/fast_correlation_based_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/non_categorical_numeric_columns.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/nonnumeric_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/pca_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/select_by_single_feature_performance_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/smart_correlation_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/unseen_reducer.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/selector/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/selector/selector.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/stratified_brier_score_loss.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/threshold_callback.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/class_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/combined_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/exponential_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/linear_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/noop_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/sigmoid_weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/weights.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/weights/weights_router.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/windower/__init__.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/windower/windower.py +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/SOURCES.txt +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/dependency_links.txt +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/not-zip-safe +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/requires.txt +0 -0
- {wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer.egg-info/top_level.txt +0 -0
|
@@ -23,7 +23,7 @@ def install_requires() -> typing.List[str]:
|
|
|
23
23
|
|
|
24
24
|
setup(
|
|
25
25
|
name='wavetrainer',
|
|
26
|
-
version='0.3.
|
|
26
|
+
version='0.3.18',
|
|
27
27
|
description='A library for automatically finding the optimal model within feature and hyperparameter space.',
|
|
28
28
|
long_description=long_description,
|
|
29
29
|
long_description_content_type='text/markdown',
|
|
@@ -269,7 +269,7 @@ class CatboostModel(Model):
|
|
|
269
269
|
cat_features=df.select_dtypes(include="category").columns.tolist(),
|
|
270
270
|
)
|
|
271
271
|
catboost = self._provide_catboost()
|
|
272
|
-
pred = catboost.predict(pred_pool)
|
|
272
|
+
pred = catboost.predict(pred_pool, thread_count=1)
|
|
273
273
|
if self._model_type == ModelType.QUANTILE_REGRESSION:
|
|
274
274
|
df = pd.DataFrame(
|
|
275
275
|
index=df.index,
|
|
@@ -14,6 +14,7 @@ import numpy as np
|
|
|
14
14
|
import optuna
|
|
15
15
|
import pandas as pd
|
|
16
16
|
import tqdm
|
|
17
|
+
from joblib import Parallel, delayed # type: ignore
|
|
17
18
|
from sklearn.metrics import f1_score # type: ignore
|
|
18
19
|
from sklearn.metrics import (accuracy_score, brier_score_loss, log_loss,
|
|
19
20
|
precision_score, r2_score, recall_score)
|
|
@@ -676,10 +677,13 @@ class Trainer(Fit):
|
|
|
676
677
|
return self
|
|
677
678
|
|
|
678
679
|
def transform(
|
|
679
|
-
self,
|
|
680
|
+
self,
|
|
681
|
+
df: pd.DataFrame,
|
|
682
|
+
optimistic: bool = False,
|
|
683
|
+
ignore_no_dates: bool = False,
|
|
684
|
+
n_jobs: int = -1,
|
|
680
685
|
) -> pd.DataFrame:
|
|
681
|
-
"""Predict the expected values of the data."""
|
|
682
|
-
# tqdm.tqdm.pandas(desc="Inferring...")
|
|
686
|
+
"""Predict the expected values of the data running columns in parallel."""
|
|
683
687
|
input_df = df.copy()
|
|
684
688
|
df = df.reindex(sorted(df.columns), axis=1)
|
|
685
689
|
feature_columns = df.columns.values
|
|
@@ -689,22 +693,24 @@ class Trainer(Fit):
|
|
|
689
693
|
else pd.DatetimeIndex(pd.to_datetime(df[self._dt_column]))
|
|
690
694
|
)
|
|
691
695
|
|
|
692
|
-
|
|
696
|
+
# --- INNER FUNCTION 1: Process a single column directory ---
|
|
697
|
+
def process_single_column(column: str):
|
|
693
698
|
column_path = os.path.join(self._folder, column)
|
|
694
699
|
if not os.path.isdir(column_path):
|
|
695
|
-
|
|
700
|
+
return None
|
|
701
|
+
|
|
696
702
|
dates = []
|
|
697
703
|
for date_str in os.listdir(column_path):
|
|
698
704
|
date_path = os.path.join(column_path, date_str)
|
|
699
|
-
if not os.path.isdir(date_path):
|
|
700
|
-
continue
|
|
701
|
-
if not os.listdir(date_path):
|
|
705
|
+
if not os.path.isdir(date_path) or not os.listdir(date_path):
|
|
702
706
|
continue
|
|
703
707
|
dates.append(datetime.datetime.fromisoformat(date_str))
|
|
708
|
+
|
|
704
709
|
if not dates:
|
|
705
710
|
if ignore_no_dates:
|
|
706
|
-
|
|
711
|
+
return None
|
|
707
712
|
raise ValueError(f"no dates found for {column}.")
|
|
713
|
+
|
|
708
714
|
dates = sorted(dates)
|
|
709
715
|
bins: list[datetime.datetime] = sorted(
|
|
710
716
|
[dt_index.min().to_pydatetime()]
|
|
@@ -712,12 +718,15 @@ class Trainer(Fit):
|
|
|
712
718
|
+ [(dt_index.max() + pd.Timedelta(days=1)).to_pydatetime()]
|
|
713
719
|
)
|
|
714
720
|
|
|
721
|
+
# --- INNER FUNCTION 2: The actual prediction logic ---
|
|
715
722
|
def perform_predictions(
|
|
716
723
|
group: pd.DataFrame,
|
|
717
724
|
column_path: str,
|
|
718
725
|
column: str,
|
|
719
726
|
dates: list[datetime.datetime],
|
|
720
727
|
) -> pd.DataFrame:
|
|
728
|
+
group = group.copy() # Avoid SettingWithCopyWarning in parallel threads
|
|
729
|
+
|
|
721
730
|
group_dt_index = (
|
|
722
731
|
group.index
|
|
723
732
|
if self._dt_column is None
|
|
@@ -740,7 +749,6 @@ class Trainer(Fit):
|
|
|
740
749
|
filtered_dates = [dates[-1]]
|
|
741
750
|
date_str = filtered_dates[-1].isoformat()
|
|
742
751
|
folder = os.path.join(column_path, date_str)
|
|
743
|
-
# print(f"Loading {folder}")
|
|
744
752
|
|
|
745
753
|
try:
|
|
746
754
|
reducer = self._provide_reducer(folder)
|
|
@@ -756,24 +764,27 @@ class Trainer(Fit):
|
|
|
756
764
|
y_pred = calibrator.transform(
|
|
757
765
|
y_pred if calibrator.predictions_as_x(None) else x_pred
|
|
758
766
|
)
|
|
767
|
+
|
|
759
768
|
for new_column in y_pred.columns.values:
|
|
760
769
|
group["_".join([column, new_column])] = y_pred[new_column]
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
print
|
|
764
|
-
|
|
765
|
-
print(f"
|
|
766
|
-
|
|
767
|
-
|
|
770
|
+
|
|
771
|
+
except (FileNotFoundError, AttributeError) as exc:
|
|
772
|
+
# We keep this error print so you don't lose debugging info,
|
|
773
|
+
# but removed the standard progress print to keep tqdm clean.
|
|
774
|
+
print(f"\nModel {folder} failed:\n{str(exc)}")
|
|
775
|
+
if isinstance(exc, AttributeError):
|
|
776
|
+
raise exc
|
|
768
777
|
|
|
769
778
|
return group
|
|
770
779
|
|
|
780
|
+
# Apply the grouped predictions
|
|
771
781
|
old_index = dt_index.copy()
|
|
772
782
|
df_group = df.groupby(
|
|
773
783
|
dt_index.map(functools.partial(_assign_bin, bins=bins))
|
|
774
784
|
)
|
|
785
|
+
|
|
775
786
|
if len(df_group) == 1:
|
|
776
|
-
|
|
787
|
+
df_out = df_group.apply(
|
|
777
788
|
functools.partial(
|
|
778
789
|
perform_predictions,
|
|
779
790
|
column_path=column_path,
|
|
@@ -782,7 +793,8 @@ class Trainer(Fit):
|
|
|
782
793
|
)
|
|
783
794
|
)
|
|
784
795
|
else:
|
|
785
|
-
|
|
796
|
+
# Using standard apply to avoid messy parallel tqdm console output
|
|
797
|
+
df_out = df_group.apply(
|
|
786
798
|
functools.partial(
|
|
787
799
|
perform_predictions,
|
|
788
800
|
column_path=column_path,
|
|
@@ -790,15 +802,45 @@ class Trainer(Fit):
|
|
|
790
802
|
dates=dates,
|
|
791
803
|
)
|
|
792
804
|
)
|
|
805
|
+
|
|
793
806
|
if self._dt_column is None:
|
|
794
|
-
|
|
807
|
+
df_out = df_out.set_index(old_index)
|
|
808
|
+
|
|
809
|
+
if isinstance(df_out.index, pd.MultiIndex):
|
|
810
|
+
df_out = df_out.droplevel(0)
|
|
811
|
+
|
|
812
|
+
# EXTRACT: Only return the new prediction columns to prevent heavy IPC memory overhead
|
|
813
|
+
new_cols = [c for c in df_out.columns if c not in df.columns]
|
|
814
|
+
return df_out[new_cols]
|
|
815
|
+
|
|
816
|
+
# --- PARALLEL EXECUTION ---
|
|
817
|
+
columns_to_process = os.listdir(self._folder)
|
|
818
|
+
|
|
819
|
+
# Wrap the Parallel call in tqdm and set return_as="generator"
|
|
820
|
+
parallel_results = list(
|
|
821
|
+
tqdm.tqdm(
|
|
822
|
+
Parallel(n_jobs=n_jobs, return_as="generator")(
|
|
823
|
+
delayed(process_single_column)(column)
|
|
824
|
+
for column in columns_to_process
|
|
825
|
+
),
|
|
826
|
+
total=len(columns_to_process),
|
|
827
|
+
desc="Inferring Columns",
|
|
828
|
+
)
|
|
829
|
+
)
|
|
830
|
+
|
|
831
|
+
# Filter out skipped directories (None) and empty dataframes
|
|
832
|
+
valid_results = [
|
|
833
|
+
res for res in parallel_results if res is not None and not res.empty
|
|
834
|
+
]
|
|
795
835
|
|
|
796
|
-
|
|
797
|
-
|
|
836
|
+
# Stitch all the newly created columns back onto the main dataframe
|
|
837
|
+
if valid_results:
|
|
838
|
+
df = pd.concat([df] + valid_results, axis=1)
|
|
839
|
+
|
|
840
|
+
# Ensure no original input columns were dropped
|
|
798
841
|
for col in input_df.columns.values:
|
|
799
|
-
if col in df.columns.values:
|
|
800
|
-
|
|
801
|
-
df[col] = input_df[col]
|
|
842
|
+
if col not in df.columns.values:
|
|
843
|
+
df[col] = input_df[col]
|
|
802
844
|
|
|
803
845
|
return df
|
|
804
846
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/calibrator/conformal_quantile_calibration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_classifier_wrap.py
RENAMED
|
File without changes
|
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/model/catboost/catboost_regressor_wrap.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/normaliser/powertransformer_normaliser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/fast_correlation_based_reducer.py
RENAMED
|
File without changes
|
{wavetrainer-0.3.16 → wavetrainer-0.3.18}/wavetrainer/reducer/non_categorical_numeric_columns.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|