upgini 1.2.118__tar.gz → 1.2.120__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.118 → upgini-1.2.120}/PKG-INFO +1 -1
  2. upgini-1.2.120/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/features_enricher.py +74 -48
  4. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/display_utils.py +12 -9
  5. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/sklearn_ext.py +3 -4
  6. upgini-1.2.118/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.118 → upgini-1.2.120}/.gitignore +0 -0
  8. {upgini-1.2.118 → upgini-1.2.120}/LICENSE +0 -0
  9. {upgini-1.2.118 → upgini-1.2.120}/README.md +0 -0
  10. {upgini-1.2.118 → upgini-1.2.120}/pyproject.toml +0 -0
  11. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/all_operators.py +0 -0
  17. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/operator.py +0 -0
  22. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/__init__.py +0 -0
  23. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/base.py +0 -0
  24. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/cross.py +0 -0
  25. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/delta.py +0 -0
  26. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/lag.py +0 -0
  27. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/roll.py +0 -0
  28. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/trend.py +0 -0
  29. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/timeseries/volatility.py +0 -0
  30. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/unary.py +0 -0
  31. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/utils.py +0 -0
  32. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/autofe/vector.py +0 -0
  33. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/data_source/__init__.py +0 -0
  34. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/data_source/data_source_publisher.py +0 -0
  35. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/dataset.py +0 -0
  36. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/errors.py +0 -0
  37. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/http.py +0 -0
  38. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/mdc/__init__.py +0 -0
  39. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/mdc/context.py +0 -0
  40. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/metadata.py +0 -0
  41. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/metrics.py +0 -0
  42. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/normalizer/__init__.py +0 -0
  43. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/normalizer/normalize_utils.py +0 -0
  44. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/resource_bundle/__init__.py +0 -0
  45. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/resource_bundle/exceptions.py +0 -0
  46. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/resource_bundle/strings.properties +0 -0
  47. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  48. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/sampler/__init__.py +0 -0
  49. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/sampler/base.py +0 -0
  50. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/sampler/random_under_sampler.py +0 -0
  51. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/sampler/utils.py +0 -0
  52. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/search_task.py +0 -0
  53. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/spinner.py +0 -0
  54. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  55. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/__init__.py +0 -0
  56. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/base_search_key_detector.py +0 -0
  57. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/blocked_time_series.py +0 -0
  58. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/config.py +0 -0
  59. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/country_utils.py +0 -0
  60. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/custom_loss_utils.py +0 -0
  61. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/cv_utils.py +0 -0
  62. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/datetime_utils.py +0 -0
  63. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/deduplicate_utils.py +0 -0
  64. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/feature_info.py +0 -0
  67. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/features_validator.py +0 -0
  68. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/format.py +0 -0
  69. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/hash_utils.py +0 -0
  70. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/ip_utils.py +0 -0
  71. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/mstats.py +0 -0
  72. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/phone_utils.py +0 -0
  73. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/postal_code_utils.py +0 -0
  74. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/progress_bar.py +0 -0
  75. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/psi.py +0 -0
  76. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/sample_utils.py +0 -0
  77. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.118 → upgini-1.2.120}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.118
3
+ Version: 1.2.120
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.120"
@@ -854,7 +854,7 @@ class FeaturesEnricher(TransformerMixin):
854
854
  raise e
855
855
  finally:
856
856
  self.logger.info(f"Transform elapsed time: {time.time() - start_time}")
857
-
857
+
858
858
  return result
859
859
 
860
860
  def calculate_metrics(
@@ -1028,13 +1028,7 @@ class FeaturesEnricher(TransformerMixin):
1028
1028
  columns_renaming,
1029
1029
  _,
1030
1030
  ) = prepared_data
1031
-
1032
- # rename baseline_score_column
1033
- reversed_renaming = {v: k for k, v in columns_renaming.items()}
1034
- baseline_score_column = self.baseline_score_column
1035
- if baseline_score_column is not None:
1036
- baseline_score_column = reversed_renaming[baseline_score_column]
1037
-
1031
+
1038
1032
  gc.collect()
1039
1033
 
1040
1034
  if fitting_X.shape[1] == 0 and fitting_enriched_X.shape[1] == 0:
@@ -1089,7 +1083,7 @@ class FeaturesEnricher(TransformerMixin):
1089
1083
  has_time=has_time,
1090
1084
  )
1091
1085
  baseline_cv_result = baseline_estimator.cross_val_predict(
1092
- fitting_X, y_sorted, baseline_score_column
1086
+ fitting_X, y_sorted, self.baseline_score_column
1093
1087
  )
1094
1088
  baseline_metric = baseline_cv_result.get_display_metric()
1095
1089
  if baseline_metric is None:
@@ -1192,7 +1186,7 @@ class FeaturesEnricher(TransformerMixin):
1192
1186
  f"on client features: {eval_X_sorted.columns.to_list()}"
1193
1187
  )
1194
1188
  etalon_eval_results = baseline_estimator.calculate_metric(
1195
- eval_X_sorted, eval_y_sorted, baseline_score_column
1189
+ eval_X_sorted, eval_y_sorted, self.baseline_score_column
1196
1190
  )
1197
1191
  etalon_eval_metric = etalon_eval_results.get_display_metric()
1198
1192
  self.logger.info(
@@ -1741,7 +1735,8 @@ class FeaturesEnricher(TransformerMixin):
1741
1735
  c
1742
1736
  for c in (validated_X.columns.to_list() + generated_features)
1743
1737
  if (not self.fit_select_features or c in set(self.feature_names_).union(self.id_columns or []))
1744
- and c not in (
1738
+ and c
1739
+ not in (
1745
1740
  excluding_search_keys
1746
1741
  + list(self.fit_dropped_features)
1747
1742
  + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
@@ -2215,7 +2210,8 @@ class FeaturesEnricher(TransformerMixin):
2215
2210
  progress_callback=progress_callback,
2216
2211
  add_fit_system_record_id=True,
2217
2212
  )
2218
- if enriched_df is None:
2213
+ if enriched_df is None or len(enriched_df) == 0 or len(enriched_df.columns) == 0:
2214
+ self.logger.warning(f"Empty enriched dataframe returned: {enriched_df}, returning None")
2219
2215
  return None
2220
2216
 
2221
2217
  x_columns = [
@@ -2500,6 +2496,9 @@ if response.status_code == 200:
2500
2496
  ) -> tuple[pd.DataFrame, dict[str, str], list[str], dict[str, SearchKey]]:
2501
2497
  if self._search_task is None:
2502
2498
  raise NotFittedError(self.bundle.get("transform_unfitted_enricher"))
2499
+ features_meta = self._search_task.get_all_features_metadata_v2()
2500
+ if features_meta is None:
2501
+ raise NotFittedError(self.bundle.get("transform_unfitted_enricher"))
2503
2502
 
2504
2503
  start_time = time.time()
2505
2504
  search_id = self.search_id or (self._search_task.search_task_id if self._search_task is not None else None)
@@ -2519,7 +2518,7 @@ if response.status_code == 200:
2519
2518
  if len(self.feature_names_) == 0:
2520
2519
  msg = self.bundle.get("no_important_features_for_transform")
2521
2520
  self.__log_warning(msg, show_support_link=True)
2522
- return X, {c: c for c in X.columns}, [], dict()
2521
+ return None, {}, [], self.search_keys
2523
2522
 
2524
2523
  self.__validate_search_keys(self.search_keys, self.search_id)
2525
2524
 
@@ -2527,9 +2526,8 @@ if response.status_code == 200:
2527
2526
  msg = self.bundle.get("transform_with_paid_features")
2528
2527
  self.logger.warning(msg)
2529
2528
  self.__display_support_link(msg)
2530
- return None, {c: c for c in X.columns}, [], {}
2529
+ return None, {}, [], self.search_keys
2531
2530
 
2532
- features_meta = self._search_task.get_all_features_metadata_v2()
2533
2531
  online_api_features = [fm.name for fm in features_meta if fm.from_online_api and fm.shap_value > 0]
2534
2532
  if len(online_api_features) > 0:
2535
2533
  self.logger.warning(
@@ -2550,7 +2548,7 @@ if response.status_code == 200:
2550
2548
  self.logger.warning(msg)
2551
2549
  print(msg)
2552
2550
  show_request_quote_button()
2553
- return None, {c: c for c in X.columns}, [], {}
2551
+ return None, {}, [], {}
2554
2552
  else:
2555
2553
  msg = self.bundle.get("transform_usage_info").format(
2556
2554
  transform_usage.limit, transform_usage.transformed_rows
@@ -2620,14 +2618,33 @@ if response.status_code == 200:
2620
2618
 
2621
2619
  # If there are no external features, we don't call backend on transform
2622
2620
  external_features = [fm for fm in features_meta if fm.shap_value > 0 and fm.source != "etalon"]
2623
- if not external_features:
2621
+ if len(external_features) == 0:
2624
2622
  self.logger.warning(
2625
2623
  "No external features found, returning original dataframe"
2626
2624
  f" with generated important features: {self.feature_names_}"
2627
2625
  )
2628
- filtered_columns = [c for c in self.feature_names_ if c in df.columns]
2629
- self.logger.warning(f"Filtered columns by existance in dataframe: {filtered_columns}")
2630
- return df[filtered_columns], columns_renaming, generated_features, search_keys
2626
+ df = df.rename(columns=columns_renaming)
2627
+ generated_features = [columns_renaming.get(c, c) for c in generated_features]
2628
+ search_keys = {columns_renaming.get(c, c): t for c, t in search_keys.items()}
2629
+ selecting_columns = self._selecting_input_and_generated_columns(
2630
+ validated_Xy, generated_features, keep_input, trace_id
2631
+ )
2632
+ self.logger.warning(f"Filtered columns by existance in dataframe: {selecting_columns}")
2633
+ if add_fit_system_record_id:
2634
+ df = self._add_fit_system_record_id(
2635
+ df,
2636
+ search_keys,
2637
+ SYSTEM_RECORD_ID,
2638
+ TARGET,
2639
+ columns_renaming,
2640
+ self.id_columns,
2641
+ self.cv,
2642
+ self.model_task_type,
2643
+ self.logger,
2644
+ self.bundle,
2645
+ )
2646
+ selecting_columns.append(SYSTEM_RECORD_ID)
2647
+ return df[selecting_columns], columns_renaming, generated_features, search_keys
2631
2648
 
2632
2649
  # Don't pass all features in backend on transform
2633
2650
  runtime_parameters = self._get_copy_of_runtime_parameters()
@@ -2845,29 +2862,12 @@ if response.status_code == 200:
2845
2862
  how="left",
2846
2863
  )
2847
2864
 
2848
- fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2849
- new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2850
-
2851
- selected_generated_features = [
2852
- c for c in generated_features if not self.fit_select_features or c in self.feature_names_
2853
- ]
2854
- if keep_input is True:
2855
- selected_input_columns = [
2856
- c
2857
- for c in validated_Xy.columns
2858
- if not self.fit_select_features
2859
- or c in self.feature_names_
2860
- or c in new_columns_on_transform
2861
- or c in self.search_keys
2862
- or c in (self.id_columns or [])
2863
- or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
2864
- ]
2865
- else:
2866
- selected_input_columns = []
2867
-
2868
- selecting_columns = selected_input_columns + selected_generated_features
2865
+ selecting_columns = self._selecting_input_and_generated_columns(
2866
+ validated_Xy, generated_features, keep_input, trace_id
2867
+ )
2869
2868
  selecting_columns.extend(
2870
- c for c in result.columns
2869
+ c
2870
+ for c in result.columns
2871
2871
  if c in self.feature_names_ and c not in selecting_columns and c not in validated_Xy.columns
2872
2872
  )
2873
2873
  if add_fit_system_record_id:
@@ -2895,6 +2895,35 @@ if response.status_code == 200:
2895
2895
 
2896
2896
  return result, columns_renaming, generated_features, search_keys
2897
2897
 
2898
+ def _selecting_input_and_generated_columns(
2899
+ self,
2900
+ validated_Xy: pd.DataFrame,
2901
+ generated_features: list[str],
2902
+ keep_input: bool,
2903
+ trace_id: str,
2904
+ ):
2905
+ fit_input_columns = [c.originalName for c in self._search_task.get_file_metadata(trace_id).columns]
2906
+ new_columns_on_transform = [c for c in validated_Xy.columns if c not in fit_input_columns]
2907
+
2908
+ selected_generated_features = [
2909
+ c for c in generated_features if not self.fit_select_features or c in self.feature_names_
2910
+ ]
2911
+ if keep_input is True:
2912
+ selected_input_columns = [
2913
+ c
2914
+ for c in validated_Xy.columns
2915
+ if not self.fit_select_features
2916
+ or c in self.feature_names_
2917
+ or c in new_columns_on_transform
2918
+ or c in self.search_keys
2919
+ or c in (self.id_columns or [])
2920
+ or c in [EVAL_SET_INDEX, TARGET] # transform for metrics calculation
2921
+ ]
2922
+ else:
2923
+ selected_input_columns = []
2924
+
2925
+ return selected_input_columns + selected_generated_features
2926
+
2898
2927
  def __validate_search_keys(self, search_keys: dict[str, SearchKey], search_id: str | None = None):
2899
2928
  if (search_keys is None or len(search_keys) == 0) and self.country_code is None:
2900
2929
  if search_id:
@@ -3349,6 +3378,7 @@ if response.status_code == 200:
3349
3378
  except KeyboardInterrupt as e:
3350
3379
  print(self.bundle.get("search_stopping"))
3351
3380
  self.rest_client.stop_search_task_v2(trace_id, self._search_task.search_task_id)
3381
+ self._search_task = None
3352
3382
  self.logger.warning(f"Search {self._search_task.search_task_id} stopped by user")
3353
3383
  print(self.bundle.get("search_stopped"))
3354
3384
  raise e
@@ -3727,9 +3757,7 @@ if response.status_code == 200:
3727
3757
  eval_types = validated_eval_X.dtypes
3728
3758
  # Find columns with different types
3729
3759
  diff_cols = [
3730
- (col, x_types[col], eval_types[col])
3731
- for col in x_types.index
3732
- if x_types[col] != eval_types[col]
3760
+ (col, x_types[col], eval_types[col]) for col in x_types.index if x_types[col] != eval_types[col]
3733
3761
  ]
3734
3762
  diff_col_names = [col for col, _, _ in diff_cols]
3735
3763
  # print columns with different types
@@ -3815,9 +3843,7 @@ if response.status_code == 200:
3815
3843
  return Xy[X.columns].copy(), Xy[TARGET].copy()
3816
3844
 
3817
3845
  @staticmethod
3818
- def _sort_by_system_record_id(
3819
- X: pd.DataFrame, y: pd.Series, cv: CVType | None
3820
- ) -> tuple[pd.DataFrame, pd.Series]:
3846
+ def _sort_by_system_record_id(X: pd.DataFrame, y: pd.Series, cv: CVType | None) -> tuple[pd.DataFrame, pd.Series]:
3821
3847
  if cv not in [CVType.time_series, CVType.blocked_time_series]:
3822
3848
  record_id_column = ENTITY_SYSTEM_RECORD_ID if ENTITY_SYSTEM_RECORD_ID in X else SYSTEM_RECORD_ID
3823
3849
  Xy = X.copy()
@@ -269,19 +269,22 @@ def make_html_report(
269
269
  if search_keys is not None
270
270
  else ""
271
271
  }
272
- {"<h3>All relevant features. Accuracy after enrichment</h3>" + make_table(metrics_df)
273
- if metrics_df is not None
274
- else ""
272
+ {
273
+ "<h3>All relevant features. Accuracy after enrichment</h3>" + make_table(metrics_df)
274
+ if metrics_df is not None
275
+ else ""
275
276
  }
276
- {"<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
277
- if len(relevant_datasources_df) > 0
278
- else ""
277
+ {
278
+ "<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
279
+ if len(relevant_datasources_df) > 0
280
+ else ""
279
281
  }
280
282
  <h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
281
283
  {make_table(relevant_features_df, wrap_long_string=25)}
282
- {"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
283
- if autofe_descriptions_df is not None
284
- else ""
284
+ {
285
+ "<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
286
+ if autofe_descriptions_df is not None and len(autofe_descriptions_df) > 0
287
+ else ""
285
288
  }
286
289
  <p>To buy found data sources, please contact: <a href='mailto:sales@upgini.com'>sales@upgini.com</a></p>
287
290
  <p>Best regards, </br><b>Upgini Team</b></p>
@@ -1301,6 +1301,7 @@ def _encode_cat_features(X_train, y_train, X_test, y_test, cat_features, estimat
1301
1301
  encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
1302
1302
  encoder.fit(X_train[cat_features], y_train)
1303
1303
 
1304
+ # OrdinalEncoder doesn't support progressive encoding with target
1304
1305
  X_train[cat_features] = encoder.transform(X_train[cat_features]).astype(int)
1305
1306
  X_test[cat_features] = encoder.transform(X_test[cat_features]).astype(int)
1306
1307
 
@@ -1314,10 +1315,8 @@ def _encode_cat_features(X_train, y_train, X_test, y_test, cat_features, estimat
1314
1315
  encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
1315
1316
  encoder.fit(X_train[cat_features], y_train)
1316
1317
 
1317
- # Progressive encoding on train (using y)
1318
- X_train[cat_features] = encoder.transform(X_train[cat_features], y_train).astype(int)
1319
-
1320
- # Static encoding on validation (no y)
1318
+ # OrdinalEncoder doesn't support progressive encoding with target
1319
+ X_train[cat_features] = encoder.transform(X_train[cat_features]).astype(int)
1321
1320
  X_test[cat_features] = encoder.transform(X_test[cat_features]).astype(int)
1322
1321
 
1323
1322
  return X_train, y_train, X_test, y_test, [], encoder
@@ -1 +0,0 @@
1
- __version__ = "1.2.118"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes