upgini 1.2.134__tar.gz → 1.2.135__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.134 → upgini-1.2.135}/PKG-INFO +1 -1
  2. upgini-1.2.135/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/dataset.py +4 -0
  4. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/features_enricher.py +60 -30
  5. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/metadata.py +1 -0
  6. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/datetime_utils.py +2 -3
  7. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/features_validator.py +5 -3
  8. upgini-1.2.134/src/upgini/__about__.py +0 -1
  9. {upgini-1.2.134 → upgini-1.2.135}/.gitignore +0 -0
  10. {upgini-1.2.134 → upgini-1.2.135}/LICENSE +0 -0
  11. {upgini-1.2.134 → upgini-1.2.135}/README.md +0 -0
  12. {upgini-1.2.134 → upgini-1.2.135}/pyproject.toml +0 -0
  13. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/__init__.py +0 -0
  14. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/ads.py +0 -0
  15. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/ads_management/__init__.py +0 -0
  16. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/ads_management/ads_manager.py +0 -0
  17. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/__init__.py +0 -0
  18. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/all_operators.py +0 -0
  19. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/binary.py +0 -0
  20. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/date.py +0 -0
  21. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/feature.py +0 -0
  22. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/groupby.py +0 -0
  23. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/operator.py +0 -0
  24. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/__init__.py +0 -0
  25. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/base.py +0 -0
  26. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/cross.py +0 -0
  27. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/delta.py +0 -0
  28. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/lag.py +0 -0
  29. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/roll.py +0 -0
  30. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/trend.py +0 -0
  31. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/timeseries/volatility.py +0 -0
  32. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/unary.py +0 -0
  33. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/utils.py +0 -0
  34. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/autofe/vector.py +0 -0
  35. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/data_source/__init__.py +0 -0
  36. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/data_source/data_source_publisher.py +0 -0
  37. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/errors.py +0 -0
  38. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/http.py +0 -0
  39. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/mdc/__init__.py +0 -0
  40. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/mdc/context.py +0 -0
  41. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/metrics.py +0 -0
  42. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/normalizer/__init__.py +0 -0
  43. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/normalizer/normalize_utils.py +0 -0
  44. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/resource_bundle/__init__.py +0 -0
  45. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/resource_bundle/exceptions.py +0 -0
  46. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/resource_bundle/strings.properties +0 -0
  47. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  48. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/sampler/__init__.py +0 -0
  49. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/sampler/base.py +0 -0
  50. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/sampler/random_under_sampler.py +0 -0
  51. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/sampler/utils.py +0 -0
  52. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/search_task.py +0 -0
  53. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/spinner.py +0 -0
  54. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  55. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/__init__.py +0 -0
  56. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/base_search_key_detector.py +0 -0
  57. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/config.py +0 -0
  59. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/country_utils.py +0 -0
  60. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/custom_loss_utils.py +0 -0
  61. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/cv_utils.py +0 -0
  62. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/display_utils.py +0 -0
  64. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/feature_info.py +0 -0
  67. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.134 → upgini-1.2.135}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: upgini
3
- Version: 1.2.134
3
+ Version: 1.2.135
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.135"
@@ -71,6 +71,7 @@ class Dataset:
71
71
  date_column: Optional[str] = None,
72
72
  id_columns: Optional[List[str]] = None,
73
73
  is_imbalanced: bool = False,
74
+ dropped_columns: Optional[List[str]] = None,
74
75
  random_state: Optional[int] = None,
75
76
  sample_config: Optional[SampleConfig] = None,
76
77
  rest_client: Optional[_RestClient] = None,
@@ -118,6 +119,7 @@ class Dataset:
118
119
  self.is_imbalanced: bool = False
119
120
  self.id_columns = id_columns
120
121
  self.is_imbalanced = is_imbalanced
122
+ self.dropped_columns = dropped_columns
121
123
  self.date_column = date_column
122
124
  if logger is not None:
123
125
  self.logger = logger
@@ -285,6 +287,7 @@ class Dataset:
285
287
  for key in search_group
286
288
  if key in self.columns_renaming
287
289
  and not self.columns_renaming.get(key).endswith(EmailSearchKeyConverter.ONE_DOMAIN_SUFFIX)
290
+ and not self.columns_renaming.get(key) == "current_date"
288
291
  }
289
292
  ipv4_column = self.etalon_def_checked.get(FileColumnMeaningType.IP_ADDRESS.value)
290
293
  if (
@@ -475,6 +478,7 @@ class Dataset:
475
478
  hierarchicalGroupKeys=self.hierarchical_group_keys,
476
479
  hierarchicalSubgroupKeys=self.hierarchical_subgroup_keys,
477
480
  taskType=self.task_type,
481
+ droppedColumns=self.dropped_columns,
478
482
  )
479
483
 
480
484
  @staticmethod
@@ -751,7 +751,6 @@ class FeaturesEnricher(TransformerMixin):
751
751
  exclude_features_sources: list[str] | None = None,
752
752
  keep_input: bool = True,
753
753
  trace_id: str | None = None,
754
- metrics_calculation: bool = False,
755
754
  silent_mode=False,
756
755
  progress_bar: ProgressBar | None = None,
757
756
  progress_callback: Callable[[SearchProgress], Any] | None = None,
@@ -810,11 +809,12 @@ class FeaturesEnricher(TransformerMixin):
810
809
  X,
811
810
  y=y,
812
811
  exclude_features_sources=exclude_features_sources,
813
- metrics_calculation=metrics_calculation,
814
812
  silent_mode=silent_mode,
815
813
  progress_bar=progress_bar,
816
814
  keep_input=keep_input,
817
815
  )
816
+ if TARGET in result.columns:
817
+ result = result.drop(columns=TARGET)
818
818
  self.logger.info("Transform finished successfully")
819
819
  search_progress = SearchProgress(100.0, ProgressStage.FINISHED)
820
820
  if progress_bar is not None:
@@ -1047,7 +1047,8 @@ class FeaturesEnricher(TransformerMixin):
1047
1047
  with Spinner():
1048
1048
  self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
1049
1049
 
1050
- has_date = self._get_date_column(search_keys) is not None
1050
+ date_col = self._get_date_column(search_keys)
1051
+ has_date = date_col is not None and date_col in validated_X.columns
1051
1052
  model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
1052
1053
  cat_features = list(set(client_cat_features + cat_features_from_backend))
1053
1054
  has_time = has_date and isinstance(_cv, TimeSeriesSplit) or isinstance(_cv, BlockedTimeSeriesSplit)
@@ -1323,7 +1324,7 @@ class FeaturesEnricher(TransformerMixin):
1323
1324
  search_keys = {str(k): v for k, v in search_keys.items()}
1324
1325
 
1325
1326
  date_column = self._get_date_column(search_keys)
1326
- has_date = date_column is not None
1327
+ has_date = date_column is not None and date_column in validated_X.columns
1327
1328
  if not has_date:
1328
1329
  self.logger.info("No date column for OOT PSI calculation")
1329
1330
  return
@@ -1637,7 +1638,7 @@ class FeaturesEnricher(TransformerMixin):
1637
1638
 
1638
1639
  if not isinstance(_cv, BaseCrossValidator):
1639
1640
  date_column = self._get_date_column(search_keys)
1640
- date_series = X[date_column] if date_column is not None else None
1641
+ date_series = X[date_column] if date_column is not None and date_column in X.columns else None
1641
1642
  _cv, groups = CVConfig(
1642
1643
  _cv, date_series, self.random_state, self._search_task.get_shuffle_kfold(), group_columns=group_columns
1643
1644
  ).get_cv_and_groups(X)
@@ -1736,17 +1737,22 @@ class FeaturesEnricher(TransformerMixin):
1736
1737
 
1737
1738
  self.logger.info(f"Excluding search keys: {excluding_search_keys}")
1738
1739
 
1740
+ file_meta = self._search_task.get_file_metadata(trace_id)
1741
+ fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
1742
+ original_dropped_features = [columns_renaming.get(f, f) for f in fit_dropped_features]
1743
+
1739
1744
  client_features = [
1740
1745
  c
1741
- for c in (validated_X.columns.to_list() + generated_features)
1746
+ for c in validated_X.columns.to_list()
1742
1747
  if (not self.fit_select_features or c in set(self.feature_names_).union(self.id_columns or []))
1743
1748
  and c
1744
1749
  not in (
1745
1750
  excluding_search_keys
1746
- + list(self.fit_dropped_features)
1751
+ + original_dropped_features
1747
1752
  + [DateTimeConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1748
1753
  )
1749
1754
  ]
1755
+ client_features.extend(f for f in generated_features if f in self.feature_names_)
1750
1756
  if self.baseline_score_column is not None and self.baseline_score_column not in client_features:
1751
1757
  client_features.append(self.baseline_score_column)
1752
1758
  self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
@@ -1847,7 +1853,7 @@ class FeaturesEnricher(TransformerMixin):
1847
1853
  enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
1848
1854
  enriched_eval_X, eval_y_sampled, self.cv
1849
1855
  )
1850
- if date_column is not None:
1856
+ if date_column is not None and date_column in eval_X_sorted.columns:
1851
1857
  eval_set_dates[idx] = eval_X_sorted[date_column]
1852
1858
  fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
1853
1859
  fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
@@ -1936,7 +1942,9 @@ class FeaturesEnricher(TransformerMixin):
1936
1942
  and self.df_with_original_index is not None
1937
1943
  ):
1938
1944
  self.logger.info("Dataset is not imbalanced, so use enriched_X from fit")
1939
- return self.__get_enriched_from_fit(eval_set, trace_id, remove_outliers_calc_metrics)
1945
+ return self.__get_enriched_from_fit(
1946
+ validated_X, validated_y, eval_set, trace_id, remove_outliers_calc_metrics
1947
+ )
1940
1948
  else:
1941
1949
  self.logger.info(
1942
1950
  "Dataset is imbalanced or exclude_features_sources or X was passed or this is saved search."
@@ -2074,6 +2082,8 @@ class FeaturesEnricher(TransformerMixin):
2074
2082
 
2075
2083
  def __get_enriched_from_fit(
2076
2084
  self,
2085
+ validated_X: pd.DataFrame,
2086
+ validated_y: pd.Series,
2077
2087
  eval_set: list[tuple] | None,
2078
2088
  trace_id: str,
2079
2089
  remove_outliers_calc_metrics: bool | None,
@@ -2082,7 +2092,8 @@ class FeaturesEnricher(TransformerMixin):
2082
2092
  search_keys = self.fit_search_keys.copy()
2083
2093
 
2084
2094
  rows_to_drop = None
2085
- has_date = self._get_date_column(search_keys) is not None
2095
+ date_column = self._get_date_column(search_keys)
2096
+ has_date = date_column is not None and date_column in validated_X.columns
2086
2097
  self.model_task_type = self.model_task_type or define_task(
2087
2098
  self.df_with_original_index[TARGET], has_date, self.logger, silent=True
2088
2099
  )
@@ -2124,6 +2135,24 @@ class FeaturesEnricher(TransformerMixin):
2124
2135
  drop_system_record_id=False,
2125
2136
  )
2126
2137
 
2138
+ enriched_Xy.rename(columns=self.fit_columns_renaming, inplace=True)
2139
+ search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
2140
+ generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
2141
+
2142
+ validated_Xy = validated_X.copy()
2143
+ validated_Xy[TARGET] = validated_y
2144
+
2145
+ selecting_columns = self._selecting_input_and_generated_columns(
2146
+ validated_Xy, self.fit_generated_features, keep_input=True, trace_id=trace_id
2147
+ )
2148
+ selecting_columns.extend(
2149
+ c
2150
+ for c in enriched_Xy.columns
2151
+ if (c in self.feature_names_ and c not in selecting_columns and c not in validated_X.columns)
2152
+ or c in [EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SYSTEM_RECORD_ID]
2153
+ )
2154
+ enriched_Xy = enriched_Xy[selecting_columns]
2155
+
2127
2156
  # Handle eval sets extraction based on EVAL_SET_INDEX
2128
2157
  if EVAL_SET_INDEX in enriched_Xy.columns:
2129
2158
  eval_set_indices = list(enriched_Xy[EVAL_SET_INDEX].unique())
@@ -2135,7 +2164,11 @@ class FeaturesEnricher(TransformerMixin):
2135
2164
  ].copy()
2136
2165
  enriched_Xy = enriched_Xy.loc[enriched_Xy[EVAL_SET_INDEX] == 0].copy()
2137
2166
 
2138
- x_columns = [c for c in self.df_with_original_index.columns if c not in [EVAL_SET_INDEX, TARGET]]
2167
+ x_columns = [
2168
+ c
2169
+ for c in [self.fit_columns_renaming.get(k, k) for k in self.df_with_original_index.columns]
2170
+ if c not in [EVAL_SET_INDEX, TARGET] and c in selecting_columns
2171
+ ]
2139
2172
  X_sampled = enriched_Xy[x_columns].copy()
2140
2173
  y_sampled = enriched_Xy[TARGET].copy()
2141
2174
  enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
@@ -2157,15 +2190,6 @@ class FeaturesEnricher(TransformerMixin):
2157
2190
  enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
2158
2191
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
2159
2192
 
2160
- # reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
2161
- X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
2162
- enriched_X.rename(columns=self.fit_columns_renaming, inplace=True)
2163
- for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
2164
- eval_X_sampled.rename(columns=self.fit_columns_renaming, inplace=True)
2165
- enriched_eval_X.rename(columns=self.fit_columns_renaming, inplace=True)
2166
- search_keys = {self.fit_columns_renaming.get(k, k): v for k, v in search_keys.items()}
2167
- generated_features = [self.fit_columns_renaming.get(c, c) for c in self.fit_generated_features]
2168
-
2169
2193
  datasets_hash = hash_input(self.X, self.y, self.eval_set)
2170
2194
  return self.__cache_and_return_results(
2171
2195
  datasets_hash,
@@ -2642,7 +2666,7 @@ if response.status_code == 200:
2642
2666
  generated_features = [columns_renaming.get(c, c) for c in generated_features]
2643
2667
  search_keys = {columns_renaming.get(c, c): t for c, t in search_keys.items()}
2644
2668
  selecting_columns = self._selecting_input_and_generated_columns(
2645
- validated_Xy, generated_features, keep_input, trace_id
2669
+ validated_Xy, generated_features, keep_input, trace_id, is_transform=True
2646
2670
  )
2647
2671
  self.logger.warning(f"Filtered columns by existance in dataframe: {selecting_columns}")
2648
2672
  if add_fit_system_record_id:
@@ -2895,7 +2919,7 @@ if response.status_code == 200:
2895
2919
  )
2896
2920
 
2897
2921
  selecting_columns = self._selecting_input_and_generated_columns(
2898
- validated_Xy, generated_features, keep_input, trace_id
2922
+ validated_Xy, generated_features, keep_input, trace_id, is_transform=True
2899
2923
  )
2900
2924
  selecting_columns.extend(
2901
2925
  c
@@ -2933,20 +2957,24 @@ if response.status_code == 200:
2933
2957
  generated_features: list[str],
2934
2958
  keep_input: bool,
2935
2959
  trace_id: str,
2960
+ is_transform: bool = False,
2936
2961
  ):
2937
- fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2938
- new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2939
-
2940
- selected_generated_features = [
2941
- c for c in generated_features if c in self.feature_names_
2962
+ file_meta = self._search_task.get_file_metadata(trace_id)
2963
+ fit_dropped_features = self.fit_dropped_features or file_meta.droppedColumns or []
2964
+ fit_input_columns = [c.originalName for c in file_meta.columns]
2965
+ original_dropped_features = [self.fit_columns_renaming.get(c, c) for c in fit_dropped_features]
2966
+ new_columns_on_transform = [
2967
+ c for c in validated_Xy.columns if c not in fit_input_columns and c not in original_dropped_features
2942
2968
  ]
2969
+
2970
+ selected_generated_features = [c for c in generated_features if c in self.feature_names_]
2943
2971
  if keep_input is True:
2944
2972
  selected_input_columns = [
2945
2973
  c
2946
2974
  for c in validated_Xy.columns
2947
2975
  if not self.fit_select_features
2948
2976
  or c in self.feature_names_
2949
- or c in new_columns_on_transform
2977
+ or (c in new_columns_on_transform and is_transform)
2950
2978
  or c in self.search_keys
2951
2979
  or c in (self.id_columns or [])
2952
2980
  or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
@@ -3112,7 +3140,7 @@ if response.status_code == 200:
3112
3140
  self.fit_search_keys = self.__prepare_search_keys(df, self.fit_search_keys, is_demo_dataset)
3113
3141
 
3114
3142
  maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
3115
- has_date = maybe_date_column is not None
3143
+ has_date = maybe_date_column is not None and maybe_date_column in validated_X.columns
3116
3144
 
3117
3145
  self.model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
3118
3146
 
@@ -3358,6 +3386,7 @@ if response.status_code == 200:
3358
3386
  cv_type=self.cv,
3359
3387
  id_columns=self.__get_renamed_id_columns(),
3360
3388
  is_imbalanced=self.imbalanced,
3389
+ dropped_columns=[self.fit_columns_renaming.get(f, f) for f in self.fit_dropped_features],
3361
3390
  date_column=self._get_date_column(self.fit_search_keys),
3362
3391
  date_format=self.date_format,
3363
3392
  random_state=self.random_state,
@@ -3746,7 +3775,8 @@ if response.status_code == 200:
3746
3775
  if eval_set is None:
3747
3776
  return None
3748
3777
  validated_eval_set = []
3749
- has_date = self._get_date_column(self.search_keys) is not None
3778
+ date_col = self._get_date_column(self.search_keys)
3779
+ has_date = date_col is not None and date_col in X.columns
3750
3780
  for idx, eval_pair in enumerate(eval_set):
3751
3781
  validated_pair = self._validate_eval_set_pair(X, eval_pair)
3752
3782
  if validated_pair[1].isna().all():
@@ -252,6 +252,7 @@ class FileMetadata(BaseModel):
252
252
  rowsCount: Optional[int] = None
253
253
  checksumMD5: Optional[str] = None
254
254
  digest: Optional[str] = None
255
+ droppedColumns: Optional[List[str]] = None
255
256
 
256
257
  def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
257
258
  for c in self.columns:
@@ -1,6 +1,5 @@
1
1
  import datetime
2
2
  import logging
3
- import re
4
3
  from typing import Dict, List, Optional
5
4
 
6
5
  import numpy as np
@@ -67,7 +66,7 @@ class DateTimeConverter:
67
66
  try:
68
67
  if s is None or len(str(s).strip()) == 0:
69
68
  return None
70
- if not re.match(DATETIME_PATTERN, str(s)):
69
+ if sum(ch.isdigit() for ch in str(s)) < 6:
71
70
  return None
72
71
  return s
73
72
  except Exception:
@@ -116,7 +115,7 @@ class DateTimeConverter:
116
115
  else:
117
116
  return None
118
117
  else:
119
- date_col = date_col.astype("string") # .apply(self.clean_date)
118
+ date_col = date_col.astype("string").apply(self.clean_date)
120
119
  parsed_datetime = self.parse_string_date(date_col.to_frame(self.date_column), raise_errors)
121
120
  if parsed_datetime.isna().all():
122
121
  raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
@@ -44,12 +44,14 @@ class FeaturesValidator:
44
44
  else:
45
45
  empty_or_constant_features.append(f)
46
46
 
47
+ columns_renaming = columns_renaming or {}
48
+
47
49
  if one_hot_encoded_features:
48
- msg = bundle.get("one_hot_encoded_features").format(one_hot_encoded_features)
50
+ msg = bundle.get("one_hot_encoded_features").format(
51
+ [columns_renaming.get(f, f) for f in one_hot_encoded_features]
52
+ )
49
53
  warnings.append(msg)
50
54
 
51
- columns_renaming = columns_renaming or {}
52
-
53
55
  if empty_or_constant_features:
54
56
  msg = bundle.get("empty_or_contant_features").format(
55
57
  [columns_renaming.get(f, f) for f in empty_or_constant_features]
@@ -1 +0,0 @@
1
- __version__ = "1.2.134"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes