upgini 1.2.134__tar.gz → 1.2.135a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.134 → upgini-1.2.135a2}/PKG-INFO +1 -1
- upgini-1.2.135a2/src/upgini/__about__.py +1 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/features_enricher.py +51 -28
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/datetime_utils.py +2 -3
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/features_validator.py +5 -3
- upgini-1.2.134/src/upgini/__about__.py +0 -1
- {upgini-1.2.134 → upgini-1.2.135a2}/.gitignore +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/LICENSE +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/README.md +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/pyproject.toml +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/dataset.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/errors.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/http.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/metadata.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/metrics.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/search_task.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/spinner.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.135a2"
|
|
@@ -751,7 +751,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
751
751
|
exclude_features_sources: list[str] | None = None,
|
|
752
752
|
keep_input: bool = True,
|
|
753
753
|
trace_id: str | None = None,
|
|
754
|
-
metrics_calculation: bool = False,
|
|
755
754
|
silent_mode=False,
|
|
756
755
|
progress_bar: ProgressBar | None = None,
|
|
757
756
|
progress_callback: Callable[[SearchProgress], Any] | None = None,
|
|
@@ -810,11 +809,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
810
809
|
X,
|
|
811
810
|
y=y,
|
|
812
811
|
exclude_features_sources=exclude_features_sources,
|
|
813
|
-
metrics_calculation=metrics_calculation,
|
|
814
812
|
silent_mode=silent_mode,
|
|
815
813
|
progress_bar=progress_bar,
|
|
816
814
|
keep_input=keep_input,
|
|
817
815
|
)
|
|
816
|
+
if TARGET in result.columns:
|
|
817
|
+
result.drop(columns=TARGET, inplace=True)
|
|
818
818
|
self.logger.info("Transform finished successfully")
|
|
819
819
|
search_progress = SearchProgress(100.0, ProgressStage.FINISHED)
|
|
820
820
|
if progress_bar is not None:
|
|
@@ -1047,7 +1047,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1047
1047
|
with Spinner():
|
|
1048
1048
|
self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
|
|
1049
1049
|
|
|
1050
|
-
|
|
1050
|
+
date_col = self._get_date_column(search_keys)
|
|
1051
|
+
has_date = date_col is not None and date_col in validated_X.columns
|
|
1051
1052
|
model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
|
|
1052
1053
|
cat_features = list(set(client_cat_features + cat_features_from_backend))
|
|
1053
1054
|
has_time = has_date and isinstance(_cv, TimeSeriesSplit) or isinstance(_cv, BlockedTimeSeriesSplit)
|
|
@@ -1323,7 +1324,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1323
1324
|
search_keys = {str(k): v for k, v in search_keys.items()}
|
|
1324
1325
|
|
|
1325
1326
|
date_column = self._get_date_column(search_keys)
|
|
1326
|
-
has_date = date_column is not None
|
|
1327
|
+
has_date = date_column is not None and date_column in validated_X.columns
|
|
1327
1328
|
if not has_date:
|
|
1328
1329
|
self.logger.info("No date column for OOT PSI calculation")
|
|
1329
1330
|
return
|
|
@@ -1637,7 +1638,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1637
1638
|
|
|
1638
1639
|
if not isinstance(_cv, BaseCrossValidator):
|
|
1639
1640
|
date_column = self._get_date_column(search_keys)
|
|
1640
|
-
date_series = X[date_column] if date_column is not None else None
|
|
1641
|
+
date_series = X[date_column] if date_column is not None and date_column in X.columns else None
|
|
1641
1642
|
_cv, groups = CVConfig(
|
|
1642
1643
|
_cv, date_series, self.random_state, self._search_task.get_shuffle_kfold(), group_columns=group_columns
|
|
1643
1644
|
).get_cv_and_groups(X)
|
|
@@ -1738,7 +1739,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1738
1739
|
|
|
1739
1740
|
client_features = [
|
|
1740
1741
|
c
|
|
1741
|
-
for c in
|
|
1742
|
+
for c in validated_X.columns.to_list()
|
|
1742
1743
|
if (not self.fit_select_features or c in set(self.feature_names_).union(self.id_columns or []))
|
|
1743
1744
|
and c
|
|
1744
1745
|
not in (
|
|
@@ -1747,6 +1748,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1747
1748
|
+ [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1748
1749
|
)
|
|
1749
1750
|
]
|
|
1751
|
+
client_features.extend(f for f in generated_features if f in self.feature_names_)
|
|
1750
1752
|
if self.baseline_score_column is not None and self.baseline_score_column not in client_features:
|
|
1751
1753
|
client_features.append(self.baseline_score_column)
|
|
1752
1754
|
self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
|
|
@@ -1847,7 +1849,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1847
1849
|
enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
|
|
1848
1850
|
enriched_eval_X, eval_y_sampled, self.cv
|
|
1849
1851
|
)
|
|
1850
|
-
if date_column is not None:
|
|
1852
|
+
if date_column is not None and date_column in eval_X_sorted.columns:
|
|
1851
1853
|
eval_set_dates[idx] = eval_X_sorted[date_column]
|
|
1852
1854
|
fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
|
|
1853
1855
|
fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
|
|
@@ -1936,7 +1938,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1936
1938
|
and self.df_with_original_index is not None
|
|
1937
1939
|
):
|
|
1938
1940
|
self.logger.info("Dataset is not imbalanced, so use enriched_X from fit")
|
|
1939
|
-
return self.__get_enriched_from_fit(
|
|
1941
|
+
return self.__get_enriched_from_fit(
|
|
1942
|
+
validated_X, validated_y, eval_set, trace_id, remove_outliers_calc_metrics
|
|
1943
|
+
)
|
|
1940
1944
|
else:
|
|
1941
1945
|
self.logger.info(
|
|
1942
1946
|
"Dataset is imbalanced or exclude_features_sources or X was passed or this is saved search."
|
|
@@ -2074,6 +2078,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2074
2078
|
|
|
2075
2079
|
def __get_enriched_from_fit(
|
|
2076
2080
|
self,
|
|
2081
|
+
validated_X: pd.DataFrame,
|
|
2082
|
+
validated_y: pd.Series,
|
|
2077
2083
|
eval_set: list[tuple] | None,
|
|
2078
2084
|
trace_id: str,
|
|
2079
2085
|
remove_outliers_calc_metrics: bool | None,
|
|
@@ -2082,7 +2088,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2082
2088
|
search_keys = self.fit_search_keys.copy()
|
|
2083
2089
|
|
|
2084
2090
|
rows_to_drop = None
|
|
2085
|
-
|
|
2091
|
+
date_column = self._get_date_column(search_keys)
|
|
2092
|
+
has_date = date_column is not None and date_column in validated_X.columns
|
|
2086
2093
|
self.model_task_type = self.model_task_type or define_task(
|
|
2087
2094
|
self.df_with_original_index[TARGET], has_date, self.logger, silent=True
|
|
2088
2095
|
)
|
|
@@ -2124,6 +2131,24 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2124
2131
|
drop_system_record_id=False,
|
|
2125
2132
|
)
|
|
2126
2133
|
|
|
2134
|
+
enriched_Xy.rename(columns=self.fit_columns_renaming, inplace=True)
|
|
2135
|
+
search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
|
|
2136
|
+
generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
|
|
2137
|
+
|
|
2138
|
+
validated_Xy = validated_X.copy()
|
|
2139
|
+
validated_Xy[TARGET] = validated_y
|
|
2140
|
+
|
|
2141
|
+
selecting_columns = self._selecting_input_and_generated_columns(
|
|
2142
|
+
validated_Xy, self.fit_generated_features, keep_input=True, trace_id=trace_id
|
|
2143
|
+
)
|
|
2144
|
+
selecting_columns.extend(
|
|
2145
|
+
c
|
|
2146
|
+
for c in enriched_Xy.columns
|
|
2147
|
+
if (c in self.feature_names_ and c not in selecting_columns and c not in validated_X.columns)
|
|
2148
|
+
or c in [EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SYSTEM_RECORD_ID]
|
|
2149
|
+
)
|
|
2150
|
+
enriched_Xy = enriched_Xy[selecting_columns]
|
|
2151
|
+
|
|
2127
2152
|
# Handle eval sets extraction based on EVAL_SET_INDEX
|
|
2128
2153
|
if EVAL_SET_INDEX in enriched_Xy.columns:
|
|
2129
2154
|
eval_set_indices = list(enriched_Xy[EVAL_SET_INDEX].unique())
|
|
@@ -2135,7 +2160,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2135
2160
|
].copy()
|
|
2136
2161
|
enriched_Xy = enriched_Xy.loc[enriched_Xy[EVAL_SET_INDEX] == 0].copy()
|
|
2137
2162
|
|
|
2138
|
-
x_columns = [
|
|
2163
|
+
x_columns = [
|
|
2164
|
+
c
|
|
2165
|
+
for c in [self.fit_columns_renaming.get(k, k) for k in self.df_with_original_index.columns]
|
|
2166
|
+
if c not in [EVAL_SET_INDEX, TARGET] and c in selecting_columns
|
|
2167
|
+
]
|
|
2139
2168
|
X_sampled = enriched_Xy[x_columns].copy()
|
|
2140
2169
|
y_sampled = enriched_Xy[TARGET].copy()
|
|
2141
2170
|
enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
|
|
@@ -2157,15 +2186,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2157
2186
|
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
|
2158
2187
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
|
2159
2188
|
|
|
2160
|
-
# reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
|
2161
|
-
X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
|
|
2162
|
-
enriched_X.rename(columns=self.fit_columns_renaming, inplace=True)
|
|
2163
|
-
for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
|
|
2164
|
-
eval_X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
|
|
2165
|
-
enriched_eval_X.rename(columns=self.fit_columns_renaming, inplace=True)
|
|
2166
|
-
search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
|
|
2167
|
-
generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
|
|
2168
|
-
|
|
2169
2189
|
datasets_hash = hash_input(self.X, self.y, self.eval_set)
|
|
2170
2190
|
return self.__cache_and_return_results(
|
|
2171
2191
|
datasets_hash,
|
|
@@ -2642,7 +2662,7 @@ if response.status_code == 200:
|
|
|
2642
2662
|
generated_features = [columns_renaming.get(c, c) for c in generated_features]
|
|
2643
2663
|
search_keys = {columns_renaming.get(c, c): t for c, t in search_keys.items()}
|
|
2644
2664
|
selecting_columns = self._selecting_input_and_generated_columns(
|
|
2645
|
-
validated_Xy, generated_features, keep_input, trace_id
|
|
2665
|
+
validated_Xy, generated_features, keep_input, trace_id, is_transform=True
|
|
2646
2666
|
)
|
|
2647
2667
|
self.logger.warning(f"Filtered columns by existance in dataframe: {selecting_columns}")
|
|
2648
2668
|
if add_fit_system_record_id:
|
|
@@ -2895,7 +2915,7 @@ if response.status_code == 200:
|
|
|
2895
2915
|
)
|
|
2896
2916
|
|
|
2897
2917
|
selecting_columns = self._selecting_input_and_generated_columns(
|
|
2898
|
-
validated_Xy, generated_features, keep_input, trace_id
|
|
2918
|
+
validated_Xy, generated_features, keep_input, trace_id, is_transform=True
|
|
2899
2919
|
)
|
|
2900
2920
|
selecting_columns.extend(
|
|
2901
2921
|
c
|
|
@@ -2933,20 +2953,22 @@ if response.status_code == 200:
|
|
|
2933
2953
|
generated_features: list[str],
|
|
2934
2954
|
keep_input: bool,
|
|
2935
2955
|
trace_id: str,
|
|
2956
|
+
is_transform: bool = False,
|
|
2936
2957
|
):
|
|
2937
2958
|
fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
|
|
2941
|
-
c for c in generated_features if c in self.feature_names_
|
|
2959
|
+
original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_dropped_features]
|
|
2960
|
+
new_columns_on_transform = [
|
|
2961
|
+
c for c in validated_Xy.columns if c not in fit_input_columns and c not in original_dropped_features
|
|
2942
2962
|
]
|
|
2963
|
+
|
|
2964
|
+
selected_generated_features = [c for c in generated_features if c in self.feature_names_]
|
|
2943
2965
|
if keep_input is True:
|
|
2944
2966
|
selected_input_columns = [
|
|
2945
2967
|
c
|
|
2946
2968
|
for c in validated_Xy.columns
|
|
2947
2969
|
if not self.fit_select_features
|
|
2948
2970
|
or c in self.feature_names_
|
|
2949
|
-
or c in new_columns_on_transform
|
|
2971
|
+
or (c in new_columns_on_transform and is_transform)
|
|
2950
2972
|
or c in self.search_keys
|
|
2951
2973
|
or c in (self.id_columns or [])
|
|
2952
2974
|
or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
|
|
@@ -3112,7 +3134,7 @@ if response.status_code == 200:
|
|
|
3112
3134
|
self.fit_search_keys = self.__prepare_search_keys(df, self.fit_search_keys, is_demo_dataset)
|
|
3113
3135
|
|
|
3114
3136
|
maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
3115
|
-
has_date = maybe_date_column is not None
|
|
3137
|
+
has_date = maybe_date_column is not None and maybe_date_column in validated_X.columns
|
|
3116
3138
|
|
|
3117
3139
|
self.model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
3118
3140
|
|
|
@@ -3746,7 +3768,8 @@ if response.status_code == 200:
|
|
|
3746
3768
|
if eval_set is None:
|
|
3747
3769
|
return None
|
|
3748
3770
|
validated_eval_set = []
|
|
3749
|
-
|
|
3771
|
+
date_col = self._get_date_column(self.search_keys)
|
|
3772
|
+
has_date = date_col is not None and date_col in X.columns
|
|
3750
3773
|
for idx, eval_pair in enumerate(eval_set):
|
|
3751
3774
|
validated_pair = self._validate_eval_set_pair(X, eval_pair)
|
|
3752
3775
|
if validated_pair[1].isna().all():
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
-
import re
|
|
4
3
|
from typing import Dict, List, Optional
|
|
5
4
|
|
|
6
5
|
import numpy as np
|
|
@@ -67,7 +66,7 @@ class DateTimeConverter:
|
|
|
67
66
|
try:
|
|
68
67
|
if s is None or len(str(s).strip()) == 0:
|
|
69
68
|
return None
|
|
70
|
-
if
|
|
69
|
+
if sum(ch.isdigit() for ch in str(s)) < 6:
|
|
71
70
|
return None
|
|
72
71
|
return s
|
|
73
72
|
except Exception:
|
|
@@ -116,7 +115,7 @@ class DateTimeConverter:
|
|
|
116
115
|
else:
|
|
117
116
|
return None
|
|
118
117
|
else:
|
|
119
|
-
date_col = date_col.astype("string")
|
|
118
|
+
date_col = date_col.astype("string").apply(self.clean_date)
|
|
120
119
|
parsed_datetime = self.parse_string_date(date_col.to_frame(self.date_column), raise_errors)
|
|
121
120
|
if parsed_datetime.isna().all():
|
|
122
121
|
raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
|
|
@@ -44,12 +44,14 @@ class FeaturesValidator:
|
|
|
44
44
|
else:
|
|
45
45
|
empty_or_constant_features.append(f)
|
|
46
46
|
|
|
47
|
+
columns_renaming = columns_renaming or {}
|
|
48
|
+
|
|
47
49
|
if one_hot_encoded_features:
|
|
48
|
-
msg = bundle.get("one_hot_encoded_features").format(
|
|
50
|
+
msg = bundle.get("one_hot_encoded_features").format(
|
|
51
|
+
[columns_renaming.get(f, f) for f in one_hot_encoded_features]
|
|
52
|
+
)
|
|
49
53
|
warnings.append(msg)
|
|
50
54
|
|
|
51
|
-
columns_renaming = columns_renaming or {}
|
|
52
|
-
|
|
53
55
|
if empty_or_constant_features:
|
|
54
56
|
msg = bundle.get("empty_or_contant_features").format(
|
|
55
57
|
[columns_renaming.get(f, f) for f in empty_or_constant_features]
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.134"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|