upgini 1.2.134__tar.gz → 1.2.135a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (82) hide show
  1. {upgini-1.2.134 → upgini-1.2.135a2}/PKG-INFO +1 -1
  2. upgini-1.2.135a2/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/features_enricher.py +51 -28
  4. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/datetime_utils.py +2 -3
  5. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/features_validator.py +5 -3
  6. upgini-1.2.134/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.134 → upgini-1.2.135a2}/.gitignore +0 -0
  8. {upgini-1.2.134 → upgini-1.2.135a2}/LICENSE +0 -0
  9. {upgini-1.2.134 → upgini-1.2.135a2}/README.md +0 -0
  10. {upgini-1.2.134 → upgini-1.2.135a2}/pyproject.toml +0 -0
  11. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/all_operators.py +0 -0
  17. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/operator.py +0 -0
  22. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/__init__.py +0 -0
  23. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/base.py +0 -0
  24. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/cross.py +0 -0
  25. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/delta.py +0 -0
  26. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/lag.py +0 -0
  27. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/roll.py +0 -0
  28. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/trend.py +0 -0
  29. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/timeseries/volatility.py +0 -0
  30. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/unary.py +0 -0
  31. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/utils.py +0 -0
  32. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/autofe/vector.py +0 -0
  33. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/data_source/__init__.py +0 -0
  34. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/data_source/data_source_publisher.py +0 -0
  35. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/dataset.py +0 -0
  36. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/errors.py +0 -0
  37. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/http.py +0 -0
  38. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/mdc/__init__.py +0 -0
  39. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/mdc/context.py +0 -0
  40. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/metadata.py +0 -0
  41. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/metrics.py +0 -0
  42. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/normalizer/__init__.py +0 -0
  43. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/normalizer/normalize_utils.py +0 -0
  44. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/__init__.py +0 -0
  45. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/exceptions.py +0 -0
  46. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/strings.properties +0 -0
  47. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  48. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/__init__.py +0 -0
  49. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/base.py +0 -0
  50. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/random_under_sampler.py +0 -0
  51. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/sampler/utils.py +0 -0
  52. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/search_task.py +0 -0
  53. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/spinner.py +0 -0
  54. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  55. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/__init__.py +0 -0
  56. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/base_search_key_detector.py +0 -0
  57. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/config.py +0 -0
  59. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/country_utils.py +0 -0
  60. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/custom_loss_utils.py +0 -0
  61. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/cv_utils.py +0 -0
  62. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/display_utils.py +0 -0
  64. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/feature_info.py +0 -0
  67. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.134 → upgini-1.2.135a2}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.134
3
+ Version: 1.2.135a2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.135a2"
@@ -751,7 +751,6 @@ class FeaturesEnricher(TransformerMixin):
751
751
  exclude_features_sources: list[str] | None = None,
752
752
  keep_input: bool = True,
753
753
  trace_id: str | None = None,
754
- metrics_calculation: bool = False,
755
754
  silent_mode=False,
756
755
  progress_bar: ProgressBar | None = None,
757
756
  progress_callback: Callable[[SearchProgress], Any] | None = None,
@@ -810,11 +809,12 @@ class FeaturesEnricher(TransformerMixin):
810
809
  X,
811
810
  y=y,
812
811
  exclude_features_sources=exclude_features_sources,
813
- metrics_calculation=metrics_calculation,
814
812
  silent_mode=silent_mode,
815
813
  progress_bar=progress_bar,
816
814
  keep_input=keep_input,
817
815
  )
816
+ if TARGET in result.columns:
817
+ result.drop(columns=TARGET, inplace=True)
818
818
  self.logger.info("Transform finished successfully")
819
819
  search_progress = SearchProgress(100.0, ProgressStage.FINISHED)
820
820
  if progress_bar is not None:
@@ -1047,7 +1047,8 @@ class FeaturesEnricher(TransformerMixin):
1047
1047
  with Spinner():
1048
1048
  self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
1049
1049
 
1050
- has_date = self._get_date_column(search_keys) is not None
1050
+ date_col = self._get_date_column(search_keys)
1051
+ has_date = date_col is not None and date_col in validated_X.columns
1051
1052
  model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
1052
1053
  cat_features = list(set(client_cat_features + cat_features_from_backend))
1053
1054
  has_time = has_date and isinstance(_cv, TimeSeriesSplit) or isinstance(_cv, BlockedTimeSeriesSplit)
@@ -1323,7 +1324,7 @@ class FeaturesEnricher(TransformerMixin):
1323
1324
  search_keys = {str(k): v for k, v in search_keys.items()}
1324
1325
 
1325
1326
  date_column = self._get_date_column(search_keys)
1326
- has_date = date_column is not None
1327
+ has_date = date_column is not None and date_column in validated_X.columns
1327
1328
  if not has_date:
1328
1329
  self.logger.info("No date column for OOT PSI calculation")
1329
1330
  return
@@ -1637,7 +1638,7 @@ class FeaturesEnricher(TransformerMixin):
1637
1638
 
1638
1639
  if not isinstance(_cv, BaseCrossValidator):
1639
1640
  date_column = self._get_date_column(search_keys)
1640
- date_series = X[date_column] if date_column is not None else None
1641
+ date_series = X[date_column] if date_column is not None and date_column in X.columns else None
1641
1642
  _cv, groups = CVConfig(
1642
1643
  _cv, date_series, self.random_state, self._search_task.get_shuffle_kfold(), group_columns=group_columns
1643
1644
  ).get_cv_and_groups(X)
@@ -1738,7 +1739,7 @@ class FeaturesEnricher(TransformerMixin):
1738
1739
 
1739
1740
  client_features = [
1740
1741
  c
1741
- for c in (validated_X.columns.to_list() + generated_features)
1742
+ for c in validated_X.columns.to_list()
1742
1743
  if (not self.fit_select_features or c in set(self.feature_names_).union(self.id_columns or []))
1743
1744
  and c
1744
1745
  not in (
@@ -1747,6 +1748,7 @@ class FeaturesEnricher(TransformerMixin):
1747
1748
  + [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1748
1749
  )
1749
1750
  ]
1751
+ client_features.extend(f for f in generated_features if f in self.feature_names_)
1750
1752
  if self.baseline_score_column is not None and self.baseline_score_column not in client_features:
1751
1753
  client_features.append(self.baseline_score_column)
1752
1754
  self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
@@ -1847,7 +1849,7 @@ class FeaturesEnricher(TransformerMixin):
1847
1849
  enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
1848
1850
  enriched_eval_X, eval_y_sampled, self.cv
1849
1851
  )
1850
- if date_column is not None:
1852
+ if date_column is not None and date_column in eval_X_sorted.columns:
1851
1853
  eval_set_dates[idx] = eval_X_sorted[date_column]
1852
1854
  fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
1853
1855
  fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
@@ -1936,7 +1938,9 @@ class FeaturesEnricher(TransformerMixin):
1936
1938
  and self.df_with_original_index is not None
1937
1939
  ):
1938
1940
  self.logger.info("Dataset is not imbalanced, so use enriched_X from fit")
1939
- return self.__get_enriched_from_fit(eval_set, trace_id, remove_outliers_calc_metrics)
1941
+ return self.__get_enriched_from_fit(
1942
+ validated_X, validated_y, eval_set, trace_id, remove_outliers_calc_metrics
1943
+ )
1940
1944
  else:
1941
1945
  self.logger.info(
1942
1946
  "Dataset is imbalanced or exclude_features_sources or X was passed or this is saved search."
@@ -2074,6 +2078,8 @@ class FeaturesEnricher(TransformerMixin):
2074
2078
 
2075
2079
  def __get_enriched_from_fit(
2076
2080
  self,
2081
+ validated_X: pd.DataFrame,
2082
+ validated_y: pd.Series,
2077
2083
  eval_set: list[tuple] | None,
2078
2084
  trace_id: str,
2079
2085
  remove_outliers_calc_metrics: bool | None,
@@ -2082,7 +2088,8 @@ class FeaturesEnricher(TransformerMixin):
2082
2088
  search_keys = self.fit_search_keys.copy()
2083
2089
 
2084
2090
  rows_to_drop = None
2085
- has_date = self._get_date_column(search_keys) is not None
2091
+ date_column = self._get_date_column(search_keys)
2092
+ has_date = date_column is not None and date_column in validated_X.columns
2086
2093
  self.model_task_type = self.model_task_type or define_task(
2087
2094
  self.df_with_original_index[TARGET], has_date, self.logger, silent=True
2088
2095
  )
@@ -2124,6 +2131,24 @@ class FeaturesEnricher(TransformerMixin):
2124
2131
  drop_system_record_id=False,
2125
2132
  )
2126
2133
 
2134
+ enriched_Xy.rename(columns=self.fit_columns_renaming, inplace=True)
2135
+ search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
2136
+ generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
2137
+
2138
+ validated_Xy = validated_X.copy()
2139
+ validated_Xy[TARGET] = validated_y
2140
+
2141
+ selecting_columns = self._selecting_input_and_generated_columns(
2142
+ validated_Xy, self.fit_generated_features, keep_input=True, trace_id=trace_id
2143
+ )
2144
+ selecting_columns.extend(
2145
+ c
2146
+ for c in enriched_Xy.columns
2147
+ if (c in self.feature_names_ and c not in selecting_columns and c not in validated_X.columns)
2148
+ or c in [EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SYSTEM_RECORD_ID]
2149
+ )
2150
+ enriched_Xy = enriched_Xy[selecting_columns]
2151
+
2127
2152
  # Handle eval sets extraction based on EVAL_SET_INDEX
2128
2153
  if EVAL_SET_INDEX in enriched_Xy.columns:
2129
2154
  eval_set_indices = list(enriched_Xy[EVAL_SET_INDEX].unique())
@@ -2135,7 +2160,11 @@ class FeaturesEnricher(TransformerMixin):
2135
2160
  ].copy()
2136
2161
  enriched_Xy = enriched_Xy.loc[enriched_Xy[EVAL_SET_INDEX] == 0].copy()
2137
2162
 
2138
- x_columns = [c for c in self.df_with_original_index.columns if c not in [EVAL_SET_INDEX, TARGET]]
2163
+ x_columns = [
2164
+ c
2165
+ for c in [self.fit_columns_renaming.get(k, k) for k in self.df_with_original_index.columns]
2166
+ if c not in [EVAL_SET_INDEX, TARGET] and c in selecting_columns
2167
+ ]
2139
2168
  X_sampled = enriched_Xy[x_columns].copy()
2140
2169
  y_sampled = enriched_Xy[TARGET].copy()
2141
2170
  enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
@@ -2157,15 +2186,6 @@ class FeaturesEnricher(TransformerMixin):
2157
2186
  enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
2158
2187
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
2159
2188
 
2160
- # reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2161
- X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
2162
- enriched_X.rename(columns=self.fit_columns_renaming, inplace=True)
2163
- for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
2164
- eval_X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
2165
- enriched_eval_X.rename(columns=self.fit_columns_renaming, inplace=True)
2166
- search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
2167
- generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
2168
-
2169
2189
  datasets_hash = hash_input(self.X, self.y, self.eval_set)
2170
2190
  return self.__cache_and_return_results(
2171
2191
  datasets_hash,
@@ -2642,7 +2662,7 @@ if response.status_code == 200:
2642
2662
  generated_features = [columns_renaming.get(c, c) for c in generated_features]
2643
2663
  search_keys = {columns_renaming.get(c, c): t for c, t in search_keys.items()}
2644
2664
  selecting_columns = self._selecting_input_and_generated_columns(
2645
- validated_Xy, generated_features, keep_input, trace_id
2665
+ validated_Xy, generated_features, keep_input, trace_id, is_transform=True
2646
2666
  )
2647
2667
  self.logger.warning(f"Filtered columns by existance in dataframe: {selecting_columns}")
2648
2668
  if add_fit_system_record_id:
@@ -2895,7 +2915,7 @@ if response.status_code == 200:
2895
2915
  )
2896
2916
 
2897
2917
  selecting_columns = self._selecting_input_and_generated_columns(
2898
- validated_Xy, generated_features, keep_input, trace_id
2918
+ validated_Xy, generated_features, keep_input, trace_id, is_transform=True
2899
2919
  )
2900
2920
  selecting_columns.extend(
2901
2921
  c
@@ -2933,20 +2953,22 @@ if response.status_code == 200:
2933
2953
  generated_features: list[str],
2934
2954
  keep_input: bool,
2935
2955
  trace_id: str,
2956
+ is_transform: bool = False,
2936
2957
  ):
2937
2958
  fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2938
- new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2939
-
2940
- selected_generated_features = [
2941
- c for c in generated_features if c in self.feature_names_
2959
+ original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_dropped_features]
2960
+ new_columns_on_transform = [
2961
+ c for c in validated_Xy.columns if c not in fit_input_columns and c not in original_dropped_features
2942
2962
  ]
2963
+
2964
+ selected_generated_features = [c for c in generated_features if c in self.feature_names_]
2943
2965
  if keep_input is True:
2944
2966
  selected_input_columns = [
2945
2967
  c
2946
2968
  for c in validated_Xy.columns
2947
2969
  if not self.fit_select_features
2948
2970
  or c in self.feature_names_
2949
- or c in new_columns_on_transform
2971
+ or (c in new_columns_on_transform and is_transform)
2950
2972
  or c in self.search_keys
2951
2973
  or c in (self.id_columns or [])
2952
2974
  or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
@@ -3112,7 +3134,7 @@ if response.status_code == 200:
3112
3134
  self.fit_search_keys = self.__prepare_search_keys(df, self.fit_search_keys, is_demo_dataset)
3113
3135
 
3114
3136
  maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
3115
- has_date = maybe_date_column is not None
3137
+ has_date = maybe_date_column is not None and maybe_date_column in validated_X.columns
3116
3138
 
3117
3139
  self.model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
3118
3140
 
@@ -3746,7 +3768,8 @@ if response.status_code == 200:
3746
3768
  if eval_set is None:
3747
3769
  return None
3748
3770
  validated_eval_set = []
3749
- has_date = self._get_date_column(self.search_keys) is not None
3771
+ date_col = self._get_date_column(self.search_keys)
3772
+ has_date = date_col is not None and date_col in X.columns
3750
3773
  for idx, eval_pair in enumerate(eval_set):
3751
3774
  validated_pair = self._validate_eval_set_pair(X, eval_pair)
3752
3775
  if validated_pair[1].isna().all():
@@ -1,6 +1,5 @@
1
1
  import datetime
2
2
  import logging
3
- import re
4
3
  from typing import Dict, List, Optional
5
4
 
6
5
  import numpy as np
@@ -67,7 +66,7 @@ class DateTimeConverter:
67
66
  try:
68
67
  if s is None or len(str(s).strip()) == 0:
69
68
  return None
70
- if not re.match(DATETIME_PATTERN, str(s)):
69
+ if sum(ch.isdigit() for ch in str(s)) < 6:
71
70
  return None
72
71
  return s
73
72
  except Exception:
@@ -116,7 +115,7 @@ class DateTimeConverter:
116
115
  else:
117
116
  return None
118
117
  else:
119
- date_col = date_col.astype("string") # .apply(self.clean_date)
118
+ date_col = date_col.astype("string").apply(self.clean_date)
120
119
  parsed_datetime = self.parse_string_date(date_col.to_frame(self.date_column), raise_errors)
121
120
  if parsed_datetime.isna().all():
122
121
  raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
@@ -44,12 +44,14 @@ class FeaturesValidator:
44
44
  else:
45
45
  empty_or_constant_features.append(f)
46
46
 
47
+ columns_renaming = columns_renaming or {}
48
+
47
49
  if one_hot_encoded_features:
48
- msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
50
+ msg = bundle.get("one_hot_encoded_features").format(
51
+ [columns_renaming.get(f, f) for f in one_hot_encoded_features]
52
+ )
49
53
  warnings.append(msg)
50
54
 
51
- columns_renaming = columns_renaming or {}
52
-
53
55
  if empty_or_constant_features:
54
56
  msg = bundle.get("empty_or_contant_features").format(
55
57
  [columns_renaming.get(f, f) for f in empty_or_constant_features]
@@ -1 +0,0 @@
1
- __version__ = "1.2.134"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes