upgini 1.2.113a2__tar.gz → 1.2.113a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {upgini-1.2.113a2 → upgini-1.2.113a4}/PKG-INFO +1 -1
  2. upgini-1.2.113a4/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/features_enricher.py +59 -89
  4. upgini-1.2.113a2/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.113a2 → upgini-1.2.113a4}/.gitignore +0 -0
  6. {upgini-1.2.113a2 → upgini-1.2.113a4}/LICENSE +0 -0
  7. {upgini-1.2.113a2 → upgini-1.2.113a4}/README.md +0 -0
  8. {upgini-1.2.113a2 → upgini-1.2.113a4}/pyproject.toml +0 -0
  9. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/all_operators.py +0 -0
  15. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/operator.py +0 -0
  20. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/__init__.py +0 -0
  21. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/base.py +0 -0
  22. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/cross.py +0 -0
  23. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/delta.py +0 -0
  24. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/lag.py +0 -0
  25. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/roll.py +0 -0
  26. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/trend.py +0 -0
  27. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/volatility.py +0 -0
  28. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/unary.py +0 -0
  29. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/utils.py +0 -0
  30. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/vector.py +0 -0
  31. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/data_source/__init__.py +0 -0
  32. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/data_source/data_source_publisher.py +0 -0
  33. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/dataset.py +0 -0
  34. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/errors.py +0 -0
  35. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/http.py +0 -0
  36. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/mdc/__init__.py +0 -0
  37. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/mdc/context.py +0 -0
  38. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/metadata.py +0 -0
  39. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/metrics.py +0 -0
  40. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/normalizer/__init__.py +0 -0
  41. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/normalizer/normalize_utils.py +0 -0
  42. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/country_utils.py +0 -0
  57. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/custom_loss_utils.py +0 -0
  58. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/cv_utils.py +0 -0
  59. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/datetime_utils.py +0 -0
  60. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/deduplicate_utils.py +0 -0
  61. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/display_utils.py +0 -0
  62. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/email_utils.py +0 -0
  63. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  64. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/feature_info.py +0 -0
  65. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/features_validator.py +0 -0
  66. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/format.py +0 -0
  67. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/ip_utils.py +0 -0
  68. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/mstats.py +0 -0
  69. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/phone_utils.py +0 -0
  70. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/postal_code_utils.py +0 -0
  71. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/progress_bar.py +0 -0
  72. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/psi.py +0 -0
  73. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sample_utils.py +0 -0
  74. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sklearn_ext.py +0 -0
  75. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sort.py +0 -0
  76. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/target_utils.py +0 -0
  77. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/track_info.py +0 -0
  78. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/ts_utils.py +0 -0
  79. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/warning_counter.py +0 -0
  80. {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.113a2
3
+ Version: 1.2.113a4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.113a4"
@@ -1003,29 +1003,31 @@ class FeaturesEnricher(TransformerMixin):
1003
1003
  return None
1004
1004
 
1005
1005
  cat_features_from_backend = self.__get_categorical_features()
1006
+ # Convert to original names
1007
+ cat_features_from_backend = [self.fit_columns_renaming.get(c, c) for c in cat_features_from_backend]
1006
1008
  client_cat_features, search_keys_for_metrics = self._get_and_validate_client_cat_features(
1007
1009
  estimator, validated_X, self.search_keys
1008
1010
  )
1011
+ # Exclude id columns from cat_features
1009
1012
  if self.id_columns and self.id_columns_encoder is not None:
1010
1013
  if cat_features_from_backend:
1011
1014
  cat_features_from_backend = [
1012
1015
  c
1013
1016
  for c in cat_features_from_backend
1014
- if self.fit_columns_renaming.get(c, c) not in self.id_columns_encoder.feature_names_in_
1017
+ if c not in self.id_columns_encoder.feature_names_in_
1015
1018
  ]
1016
1019
  if client_cat_features:
1017
1020
  client_cat_features = [
1018
1021
  c
1019
1022
  for c in client_cat_features
1020
- if self.fit_columns_renaming.get(c, c) not in self.id_columns_encoder.feature_names_in_
1023
+ if c not in self.id_columns_encoder.feature_names_in_
1021
1024
  ]
1022
1025
  for cat_feature in cat_features_from_backend:
1023
- original_cat_feature = self.fit_columns_renaming.get(cat_feature)
1024
- if original_cat_feature in self.search_keys:
1025
- if self.search_keys[original_cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
1026
- search_keys_for_metrics.append(original_cat_feature)
1026
+ if cat_feature in self.search_keys:
1027
+ if self.search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
1028
+ search_keys_for_metrics.append(cat_feature)
1027
1029
  else:
1028
- self.logger.warning(self.bundle.get("cat_feature_search_key").format(original_cat_feature))
1030
+ self.logger.warning(self.bundle.get("cat_feature_search_key").format(cat_feature))
1029
1031
  search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
1030
1032
  self.logger.info(f"Search keys for metrics: {search_keys_for_metrics}")
1031
1033
 
@@ -1057,24 +1059,9 @@ class FeaturesEnricher(TransformerMixin):
1057
1059
  groups,
1058
1060
  _cv,
1059
1061
  columns_renaming,
1060
- eval_set_dates,
1062
+ _,
1061
1063
  ) = prepared_data
1062
1064
 
1063
- # rename cat_features
1064
- if client_cat_features:
1065
- for new_c, old_c in columns_renaming.items():
1066
- if old_c in client_cat_features:
1067
- client_cat_features.remove(old_c)
1068
- client_cat_features.append(new_c)
1069
- for cat_feature in client_cat_features:
1070
- if cat_feature not in fitting_X.columns:
1071
- self.logger.error(
1072
- f"Client cat_feature `{cat_feature}` not found in"
1073
- f" x columns: {fitting_X.columns.to_list()}"
1074
- )
1075
- else:
1076
- client_cat_features = []
1077
-
1078
1065
  # rename baseline_score_column
1079
1066
  reversed_renaming = {v: k for k, v in columns_renaming.items()}
1080
1067
  baseline_score_column = self.baseline_score_column
@@ -1303,7 +1290,7 @@ class FeaturesEnricher(TransformerMixin):
1303
1290
  metrics.append(eval_metrics)
1304
1291
 
1305
1292
  if updating_shaps is not None:
1306
- decoded_X = self._decode_id_columns(fitting_X, columns_renaming)
1293
+ decoded_X = self._decode_id_columns(fitting_X)
1307
1294
  self._update_shap_values(trace_id, decoded_X, updating_shaps, silent=not internal_call)
1308
1295
 
1309
1296
  metrics_df = pd.DataFrame(metrics)
@@ -1374,12 +1361,23 @@ class FeaturesEnricher(TransformerMixin):
1374
1361
  if isinstance(X, np.ndarray):
1375
1362
  search_keys = {str(k): v for k, v in search_keys.items()}
1376
1363
 
1377
- has_date = self._get_date_column(search_keys) is not None
1378
- if not has_date or not validated_eval_set:
1379
- self.logger.info("No date column or eval set for OOT psi calculation")
1364
+ date_column = self._get_date_column(search_keys)
1365
+ has_date = date_column is not None
1366
+ if not has_date:
1367
+ self.logger.info("No date column for OOT PSI calculation")
1368
+ return
1369
+ if not validated_eval_set:
1370
+ self.logger.info("No eval set for OOT PSI calculation")
1371
+ return
1372
+ if validated_X[date_column].nunique() <= 1:
1373
+ self.logger.warning("Constant date for OOT PSI calculation")
1374
+ return
1375
+ if self.cv is not None and self.cv.is_time_series():
1376
+ self.logger.warning("Time series CV is not supported for OOT PSI calculation")
1380
1377
  return
1381
1378
 
1382
1379
  cat_features_from_backend = self.__get_categorical_features()
1380
+ cat_features_from_backend = [self.fit_columns_renaming.get(c, c) for c in cat_features_from_backend]
1383
1381
  client_cat_features, search_keys_for_metrics = self._get_and_validate_client_cat_features(
1384
1382
  estimator, validated_X, search_keys
1385
1383
  )
@@ -1388,13 +1386,13 @@ class FeaturesEnricher(TransformerMixin):
1388
1386
  cat_features_from_backend = [
1389
1387
  c
1390
1388
  for c in cat_features_from_backend
1391
- if self.fit_columns_renaming.get(c, c) not in self.id_columns_encoder.feature_names_in_
1389
+ if c not in self.id_columns_encoder.feature_names_in_
1392
1390
  ]
1393
1391
  if client_cat_features:
1394
1392
  client_cat_features = [
1395
1393
  c
1396
1394
  for c in client_cat_features
1397
- if self.fit_columns_renaming.get(c, c) not in self.id_columns_encoder.feature_names_in_
1395
+ if c not in self.id_columns_encoder.feature_names_in_
1398
1396
  ]
1399
1397
 
1400
1398
  prepared_data = self._prepare_data_for_metrics(
@@ -1429,20 +1427,6 @@ class FeaturesEnricher(TransformerMixin):
1429
1427
  eval_set_dates,
1430
1428
  ) = prepared_data
1431
1429
 
1432
- # rename cat_features
1433
- if client_cat_features:
1434
- for new_c, old_c in columns_renaming.items():
1435
- if old_c in client_cat_features:
1436
- client_cat_features.remove(old_c)
1437
- client_cat_features.append(new_c)
1438
- for cat_feature in client_cat_features:
1439
- if cat_feature not in fitting_X.columns:
1440
- self.logger.error(
1441
- f"Client cat_feature `{cat_feature}` not found in" f" x columns: {fitting_X.columns.to_list()}"
1442
- )
1443
- else:
1444
- client_cat_features = []
1445
-
1446
1430
  model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
1447
1431
  cat_features = list(set(client_cat_features + cat_features_from_backend))
1448
1432
 
@@ -1494,14 +1478,6 @@ class FeaturesEnricher(TransformerMixin):
1494
1478
  # Find latest eval set or earliest if all eval sets are before train set
1495
1479
  date_column = self._get_date_column(search_keys)
1496
1480
 
1497
- if (
1498
- date_column is None
1499
- or not eval_set
1500
- or not eval_set_dates
1501
- or (self.cv is not None and self.cv.is_time_series())
1502
- ):
1503
- return []
1504
-
1505
1481
  # Get minimum date from main dataset X
1506
1482
  main_min_date = X[date_column].min()
1507
1483
 
@@ -1755,7 +1731,7 @@ class FeaturesEnricher(TransformerMixin):
1755
1731
  def _get_and_validate_client_cat_features(
1756
1732
  self, estimator: Optional[Any], X: pd.DataFrame, search_keys: Dict[str, SearchKey]
1757
1733
  ) -> Tuple[Optional[List[str]], List[str]]:
1758
- cat_features = None
1734
+ cat_features = []
1759
1735
  search_keys_for_metrics = []
1760
1736
  if (
1761
1737
  estimator is not None
@@ -1924,7 +1900,7 @@ class FeaturesEnricher(TransformerMixin):
1924
1900
  fitting_X, y_sorted, search_keys, self.model_task_type, sort_all_columns=True, logger=self.logger
1925
1901
  )
1926
1902
  fitting_X = fitting_X[fitting_x_columns]
1927
- fitting_X, _ = self._encode_id_columns(fitting_X, self.fit_columns_renaming)
1903
+ fitting_X, _ = self._encode_id_columns(fitting_X)
1928
1904
  self.logger.info(f"Final sorted list of fitting X columns: {fitting_x_columns}")
1929
1905
  fitting_enriched_x_columns = fitting_enriched_X.columns.to_list()
1930
1906
  fitting_enriched_x_columns = sort_columns(
@@ -1936,7 +1912,7 @@ class FeaturesEnricher(TransformerMixin):
1936
1912
  logger=self.logger,
1937
1913
  )
1938
1914
  fitting_enriched_X = fitting_enriched_X[fitting_enriched_x_columns]
1939
- fitting_enriched_X, _ = self._encode_id_columns(fitting_enriched_X, self.fit_columns_renaming)
1915
+ fitting_enriched_X, _ = self._encode_id_columns(fitting_enriched_X)
1940
1916
  self.logger.info(f"Final sorted list of fitting enriched X columns: {fitting_enriched_x_columns}")
1941
1917
  date_column = self._get_date_column(search_keys)
1942
1918
  eval_set_dates = {}
@@ -1968,8 +1944,8 @@ class FeaturesEnricher(TransformerMixin):
1968
1944
  .astype(np.float64)
1969
1945
  )
1970
1946
 
1971
- fitting_eval_X, unknown_dict = self._encode_id_columns(fitting_eval_X, self.fit_columns_renaming)
1972
- fitting_enriched_eval_X, _ = self._encode_id_columns(fitting_enriched_eval_X, self.fit_columns_renaming)
1947
+ fitting_eval_X, unknown_dict = self._encode_id_columns(fitting_eval_X)
1948
+ fitting_enriched_eval_X, _ = self._encode_id_columns(fitting_enriched_eval_X)
1973
1949
 
1974
1950
  if len(unknown_dict) > 0:
1975
1951
  print(self.bundle.get("unknown_id_column_value_in_eval_set").format(unknown_dict))
@@ -3203,7 +3179,7 @@ if response.status_code == 200:
3203
3179
  is_numeric_dtype(df[self.TARGET_NAME])
3204
3180
  and self.model_task_type in [ModelTaskType.BINARY, ModelTaskType.MULTICLASS]
3205
3181
  and has_date
3206
- and not self.cv.is_time_series()
3182
+ and (self.cv is None or not self.cv.is_time_series())
3207
3183
  ):
3208
3184
  self._validate_PSI(df.sort_values(by=maybe_date_column))
3209
3185
 
@@ -3236,8 +3212,7 @@ if response.status_code == 200:
3236
3212
  self.fit_generated_features = [f for f in self.fit_generated_features if f not in self.fit_dropped_features]
3237
3213
 
3238
3214
  # Group columns should have normalized names
3239
- self.cv = None
3240
- self.__adjust_cv(df)
3215
+ self.__adjust_cv(df, force=True)
3241
3216
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
3242
3217
  id_columns = self.__get_renamed_id_columns()
3243
3218
  if id_columns:
@@ -3542,19 +3517,21 @@ if response.status_code == 200:
3542
3517
  reverse_renaming = {v: k for k, v in renaming.items()}
3543
3518
  return None if self.id_columns is None else [reverse_renaming.get(c) or c for c in self.id_columns]
3544
3519
 
3545
- def __adjust_cv(self, df: pd.DataFrame):
3520
+ def __adjust_cv(self, df: pd.DataFrame, force: bool = False):
3521
+ if self.cv is not None and not force:
3522
+ return
3523
+
3546
3524
  date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
3547
3525
  # Check Multivariate time series
3548
3526
  if (
3549
- self.cv is None
3550
- and date_column
3527
+ date_column
3551
3528
  and self.model_task_type == ModelTaskType.REGRESSION
3552
3529
  and len({SearchKey.PHONE, SearchKey.EMAIL, SearchKey.HEM}.intersection(self.fit_search_keys.keys())) == 0
3553
3530
  and is_blocked_time_series(df, date_column, list(self.fit_search_keys.keys()) + [TARGET])
3554
3531
  ):
3555
3532
  msg = self.bundle.get("multivariate_timeseries_detected")
3556
3533
  self.__override_cv(CVType.blocked_time_series, msg, print_warning=False)
3557
- elif self.cv is None and self.model_task_type != ModelTaskType.REGRESSION:
3534
+ elif self.model_task_type != ModelTaskType.REGRESSION:
3558
3535
  msg = self.bundle.get("group_k_fold_in_classification")
3559
3536
  self.__override_cv(CVType.group_k_fold, msg, print_warning=self.cv is not None)
3560
3537
  group_columns = self._get_group_columns(df, self.fit_search_keys)
@@ -3592,39 +3569,32 @@ if response.status_code == 200:
3592
3569
  def _encode_id_columns(
3593
3570
  self,
3594
3571
  X: pd.DataFrame,
3595
- columns_renaming: Optional[Dict[str, str]] = None,
3596
3572
  ) -> Tuple[pd.DataFrame, Dict[str, List[Any]]]:
3597
- columns_renaming = columns_renaming or {}
3598
3573
  unknown_dict = {}
3599
3574
 
3600
3575
  if self.id_columns and self.id_columns_encoder is not None:
3601
- inverse_columns_renaming = {v: k for k, v in columns_renaming.items()}
3602
- renamed_id_columns = [
3603
- inverse_columns_renaming.get(col, col) for col in self.id_columns_encoder.feature_names_in_
3604
- ]
3605
- self.logger.info(f"Convert id columns to int: {renamed_id_columns}")
3606
- encoded = self.id_columns_encoder.transform(X[renamed_id_columns].rename(columns=columns_renaming))
3607
- for i, c in enumerate(renamed_id_columns):
3608
- unknown_values = X[encoded[:, i] == -1][c].unique().tolist()
3609
- if len(unknown_values) > 0:
3610
- unknown_dict[c] = unknown_values
3611
- X[renamed_id_columns] = encoded
3612
- X = X.loc[(X[renamed_id_columns] != -1).all(axis=1)]
3613
-
3614
- if len(unknown_dict) > 0:
3615
- self.logger.warning(f"Unknown values in id columns: {unknown_dict}")
3576
+ encoding_id_columns = [c for c in self.id_columns if c in X.columns]
3577
+ if len(encoding_id_columns) > 0:
3578
+ self.logger.info(f"Convert id columns to int: {encoding_id_columns}")
3579
+ encoded = self.id_columns_encoder.transform(X[encoding_id_columns])
3580
+ for i, c in enumerate(encoding_id_columns):
3581
+ unknown_values = X[encoded[:, i] == -1][c].unique().tolist()
3582
+ if len(unknown_values) > 0:
3583
+ unknown_dict[c] = unknown_values
3584
+ X[encoding_id_columns] = encoded
3585
+ X = X.loc[(X[encoding_id_columns] != -1).all(axis=1)]
3586
+
3587
+ if len(unknown_dict) > 0:
3588
+ self.logger.warning(f"Unknown values in id columns: {unknown_dict}")
3616
3589
 
3617
3590
  return X, unknown_dict
3618
3591
 
3619
- def _decode_id_columns(self, X: pd.DataFrame, columns_renaming: Dict[str, str]):
3620
- columns_renaming = columns_renaming or {}
3592
+ def _decode_id_columns(self, X: pd.DataFrame):
3621
3593
  if self.id_columns and self.id_columns_encoder is not None:
3622
- inverse_columns_renaming = {v: k for k, v in columns_renaming.items()}
3623
- renamed_id_columns = [
3624
- inverse_columns_renaming.get(col, col) for col in self.id_columns_encoder.feature_names_in_
3625
- ]
3626
- decoded = self.id_columns_encoder.inverse_transform(X[renamed_id_columns].rename(columns=columns_renaming))
3627
- X[renamed_id_columns] = decoded
3594
+ decoding_id_columns = [c for c in self.id_columns if c in X.columns]
3595
+ if len(decoding_id_columns) > 0:
3596
+ decoded = self.id_columns_encoder.inverse_transform(X[self.id_columns])
3597
+ X[self.id_columns] = decoded
3628
3598
 
3629
3599
  return X
3630
3600
 
@@ -4170,7 +4140,7 @@ if response.status_code == 200:
4170
4140
  columns_to_sort = [date_column] if date_column is not None else []
4171
4141
 
4172
4142
  do_sorting = True
4173
- if self.id_columns and self.cv.is_time_series():
4143
+ if self.id_columns and self.cv is not None and self.cv.is_time_series():
4174
4144
  # Check duplicates by date and id_columns
4175
4145
  reversed_columns_renaming = {v: k for k, v in columns_renaming.items()}
4176
4146
  renamed_id_columns = [reversed_columns_renaming.get(c, c) for c in self.id_columns]
@@ -1 +0,0 @@
1
- __version__ = "1.2.113a2"
File without changes
File without changes
File without changes
File without changes
File without changes