upgini 1.2.113a2__tar.gz → 1.2.113a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.113a2 → upgini-1.2.113a4}/PKG-INFO +1 -1
- upgini-1.2.113a4/src/upgini/__about__.py +1 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/features_enricher.py +59 -89
- upgini-1.2.113a2/src/upgini/__about__.py +0 -1
- {upgini-1.2.113a2 → upgini-1.2.113a4}/.gitignore +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/LICENSE +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/README.md +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/pyproject.toml +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/dataset.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/errors.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/http.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/metadata.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/metrics.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/search_task.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/spinner.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.113a2 → upgini-1.2.113a4}/src/upgini/version_validator.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.113a4"
|
@@ -1003,29 +1003,31 @@ class FeaturesEnricher(TransformerMixin):
|
|
1003
1003
|
return None
|
1004
1004
|
|
1005
1005
|
cat_features_from_backend = self.__get_categorical_features()
|
1006
|
+
# Convert to original names
|
1007
|
+
cat_features_from_backend = [self.fit_columns_renaming.get(c, c) for c in cat_features_from_backend]
|
1006
1008
|
client_cat_features, search_keys_for_metrics = self._get_and_validate_client_cat_features(
|
1007
1009
|
estimator, validated_X, self.search_keys
|
1008
1010
|
)
|
1011
|
+
# Exclude id columns from cat_features
|
1009
1012
|
if self.id_columns and self.id_columns_encoder is not None:
|
1010
1013
|
if cat_features_from_backend:
|
1011
1014
|
cat_features_from_backend = [
|
1012
1015
|
c
|
1013
1016
|
for c in cat_features_from_backend
|
1014
|
-
if
|
1017
|
+
if c not in self.id_columns_encoder.feature_names_in_
|
1015
1018
|
]
|
1016
1019
|
if client_cat_features:
|
1017
1020
|
client_cat_features = [
|
1018
1021
|
c
|
1019
1022
|
for c in client_cat_features
|
1020
|
-
if
|
1023
|
+
if c not in self.id_columns_encoder.feature_names_in_
|
1021
1024
|
]
|
1022
1025
|
for cat_feature in cat_features_from_backend:
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
search_keys_for_metrics.append(original_cat_feature)
|
1026
|
+
if cat_feature in self.search_keys:
|
1027
|
+
if self.search_keys[cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
|
1028
|
+
search_keys_for_metrics.append(cat_feature)
|
1027
1029
|
else:
|
1028
|
-
self.logger.warning(self.bundle.get("cat_feature_search_key").format(
|
1030
|
+
self.logger.warning(self.bundle.get("cat_feature_search_key").format(cat_feature))
|
1029
1031
|
search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
|
1030
1032
|
self.logger.info(f"Search keys for metrics: {search_keys_for_metrics}")
|
1031
1033
|
|
@@ -1057,24 +1059,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
1057
1059
|
groups,
|
1058
1060
|
_cv,
|
1059
1061
|
columns_renaming,
|
1060
|
-
|
1062
|
+
_,
|
1061
1063
|
) = prepared_data
|
1062
1064
|
|
1063
|
-
# rename cat_features
|
1064
|
-
if client_cat_features:
|
1065
|
-
for new_c, old_c in columns_renaming.items():
|
1066
|
-
if old_c in client_cat_features:
|
1067
|
-
client_cat_features.remove(old_c)
|
1068
|
-
client_cat_features.append(new_c)
|
1069
|
-
for cat_feature in client_cat_features:
|
1070
|
-
if cat_feature not in fitting_X.columns:
|
1071
|
-
self.logger.error(
|
1072
|
-
f"Client cat_feature `{cat_feature}` not found in"
|
1073
|
-
f" x columns: {fitting_X.columns.to_list()}"
|
1074
|
-
)
|
1075
|
-
else:
|
1076
|
-
client_cat_features = []
|
1077
|
-
|
1078
1065
|
# rename baseline_score_column
|
1079
1066
|
reversed_renaming = {v: k for k, v in columns_renaming.items()}
|
1080
1067
|
baseline_score_column = self.baseline_score_column
|
@@ -1303,7 +1290,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1303
1290
|
metrics.append(eval_metrics)
|
1304
1291
|
|
1305
1292
|
if updating_shaps is not None:
|
1306
|
-
decoded_X = self._decode_id_columns(fitting_X
|
1293
|
+
decoded_X = self._decode_id_columns(fitting_X)
|
1307
1294
|
self._update_shap_values(trace_id, decoded_X, updating_shaps, silent=not internal_call)
|
1308
1295
|
|
1309
1296
|
metrics_df = pd.DataFrame(metrics)
|
@@ -1374,12 +1361,23 @@ class FeaturesEnricher(TransformerMixin):
|
|
1374
1361
|
if isinstance(X, np.ndarray):
|
1375
1362
|
search_keys = {str(k): v for k, v in search_keys.items()}
|
1376
1363
|
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1364
|
+
date_column = self._get_date_column(search_keys)
|
1365
|
+
has_date = date_column is not None
|
1366
|
+
if not has_date:
|
1367
|
+
self.logger.info("No date column for OOT PSI calculation")
|
1368
|
+
return
|
1369
|
+
if not validated_eval_set:
|
1370
|
+
self.logger.info("No eval set for OOT PSI calculation")
|
1371
|
+
return
|
1372
|
+
if validated_X[date_column].nunique() <= 1:
|
1373
|
+
self.logger.warning("Constant date for OOT PSI calculation")
|
1374
|
+
return
|
1375
|
+
if self.cv is not None and self.cv.is_time_series():
|
1376
|
+
self.logger.warning("Time series CV is not supported for OOT PSI calculation")
|
1380
1377
|
return
|
1381
1378
|
|
1382
1379
|
cat_features_from_backend = self.__get_categorical_features()
|
1380
|
+
cat_features_from_backend = [self.fit_columns_renaming.get(c, c) for c in cat_features_from_backend]
|
1383
1381
|
client_cat_features, search_keys_for_metrics = self._get_and_validate_client_cat_features(
|
1384
1382
|
estimator, validated_X, search_keys
|
1385
1383
|
)
|
@@ -1388,13 +1386,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
1388
1386
|
cat_features_from_backend = [
|
1389
1387
|
c
|
1390
1388
|
for c in cat_features_from_backend
|
1391
|
-
if
|
1389
|
+
if c not in self.id_columns_encoder.feature_names_in_
|
1392
1390
|
]
|
1393
1391
|
if client_cat_features:
|
1394
1392
|
client_cat_features = [
|
1395
1393
|
c
|
1396
1394
|
for c in client_cat_features
|
1397
|
-
if
|
1395
|
+
if c not in self.id_columns_encoder.feature_names_in_
|
1398
1396
|
]
|
1399
1397
|
|
1400
1398
|
prepared_data = self._prepare_data_for_metrics(
|
@@ -1429,20 +1427,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
1429
1427
|
eval_set_dates,
|
1430
1428
|
) = prepared_data
|
1431
1429
|
|
1432
|
-
# rename cat_features
|
1433
|
-
if client_cat_features:
|
1434
|
-
for new_c, old_c in columns_renaming.items():
|
1435
|
-
if old_c in client_cat_features:
|
1436
|
-
client_cat_features.remove(old_c)
|
1437
|
-
client_cat_features.append(new_c)
|
1438
|
-
for cat_feature in client_cat_features:
|
1439
|
-
if cat_feature not in fitting_X.columns:
|
1440
|
-
self.logger.error(
|
1441
|
-
f"Client cat_feature `{cat_feature}` not found in" f" x columns: {fitting_X.columns.to_list()}"
|
1442
|
-
)
|
1443
|
-
else:
|
1444
|
-
client_cat_features = []
|
1445
|
-
|
1446
1430
|
model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
|
1447
1431
|
cat_features = list(set(client_cat_features + cat_features_from_backend))
|
1448
1432
|
|
@@ -1494,14 +1478,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
1494
1478
|
# Find latest eval set or earliest if all eval sets are before train set
|
1495
1479
|
date_column = self._get_date_column(search_keys)
|
1496
1480
|
|
1497
|
-
if (
|
1498
|
-
date_column is None
|
1499
|
-
or not eval_set
|
1500
|
-
or not eval_set_dates
|
1501
|
-
or (self.cv is not None and self.cv.is_time_series())
|
1502
|
-
):
|
1503
|
-
return []
|
1504
|
-
|
1505
1481
|
# Get minimum date from main dataset X
|
1506
1482
|
main_min_date = X[date_column].min()
|
1507
1483
|
|
@@ -1755,7 +1731,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1755
1731
|
def _get_and_validate_client_cat_features(
|
1756
1732
|
self, estimator: Optional[Any], X: pd.DataFrame, search_keys: Dict[str, SearchKey]
|
1757
1733
|
) -> Tuple[Optional[List[str]], List[str]]:
|
1758
|
-
cat_features =
|
1734
|
+
cat_features = []
|
1759
1735
|
search_keys_for_metrics = []
|
1760
1736
|
if (
|
1761
1737
|
estimator is not None
|
@@ -1924,7 +1900,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1924
1900
|
fitting_X, y_sorted, search_keys, self.model_task_type, sort_all_columns=True, logger=self.logger
|
1925
1901
|
)
|
1926
1902
|
fitting_X = fitting_X[fitting_x_columns]
|
1927
|
-
fitting_X, _ = self._encode_id_columns(fitting_X
|
1903
|
+
fitting_X, _ = self._encode_id_columns(fitting_X)
|
1928
1904
|
self.logger.info(f"Final sorted list of fitting X columns: {fitting_x_columns}")
|
1929
1905
|
fitting_enriched_x_columns = fitting_enriched_X.columns.to_list()
|
1930
1906
|
fitting_enriched_x_columns = sort_columns(
|
@@ -1936,7 +1912,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1936
1912
|
logger=self.logger,
|
1937
1913
|
)
|
1938
1914
|
fitting_enriched_X = fitting_enriched_X[fitting_enriched_x_columns]
|
1939
|
-
fitting_enriched_X, _ = self._encode_id_columns(fitting_enriched_X
|
1915
|
+
fitting_enriched_X, _ = self._encode_id_columns(fitting_enriched_X)
|
1940
1916
|
self.logger.info(f"Final sorted list of fitting enriched X columns: {fitting_enriched_x_columns}")
|
1941
1917
|
date_column = self._get_date_column(search_keys)
|
1942
1918
|
eval_set_dates = {}
|
@@ -1968,8 +1944,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
1968
1944
|
.astype(np.float64)
|
1969
1945
|
)
|
1970
1946
|
|
1971
|
-
fitting_eval_X, unknown_dict = self._encode_id_columns(fitting_eval_X
|
1972
|
-
fitting_enriched_eval_X, _ = self._encode_id_columns(fitting_enriched_eval_X
|
1947
|
+
fitting_eval_X, unknown_dict = self._encode_id_columns(fitting_eval_X)
|
1948
|
+
fitting_enriched_eval_X, _ = self._encode_id_columns(fitting_enriched_eval_X)
|
1973
1949
|
|
1974
1950
|
if len(unknown_dict) > 0:
|
1975
1951
|
print(self.bundle.get("unknown_id_column_value_in_eval_set").format(unknown_dict))
|
@@ -3203,7 +3179,7 @@ if response.status_code == 200:
|
|
3203
3179
|
is_numeric_dtype(df[self.TARGET_NAME])
|
3204
3180
|
and self.model_task_type in [ModelTaskType.BINARY, ModelTaskType.MULTICLASS]
|
3205
3181
|
and has_date
|
3206
|
-
and not self.cv.is_time_series()
|
3182
|
+
and (self.cv is None or not self.cv.is_time_series())
|
3207
3183
|
):
|
3208
3184
|
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
3209
3185
|
|
@@ -3236,8 +3212,7 @@ if response.status_code == 200:
|
|
3236
3212
|
self.fit_generated_features = [f for f in self.fit_generated_features if f not in self.fit_dropped_features]
|
3237
3213
|
|
3238
3214
|
# Group columns should have normalized names
|
3239
|
-
self.
|
3240
|
-
self.__adjust_cv(df)
|
3215
|
+
self.__adjust_cv(df, force=True)
|
3241
3216
|
if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
|
3242
3217
|
id_columns = self.__get_renamed_id_columns()
|
3243
3218
|
if id_columns:
|
@@ -3542,19 +3517,21 @@ if response.status_code == 200:
|
|
3542
3517
|
reverse_renaming = {v: k for k, v in renaming.items()}
|
3543
3518
|
return None if self.id_columns is None else [reverse_renaming.get(c) or c for c in self.id_columns]
|
3544
3519
|
|
3545
|
-
def __adjust_cv(self, df: pd.DataFrame):
|
3520
|
+
def __adjust_cv(self, df: pd.DataFrame, force: bool = False):
|
3521
|
+
if self.cv is not None and not force:
|
3522
|
+
return
|
3523
|
+
|
3546
3524
|
date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
3547
3525
|
# Check Multivariate time series
|
3548
3526
|
if (
|
3549
|
-
|
3550
|
-
and date_column
|
3527
|
+
date_column
|
3551
3528
|
and self.model_task_type == ModelTaskType.REGRESSION
|
3552
3529
|
and len({SearchKey.PHONE, SearchKey.EMAIL, SearchKey.HEM}.intersection(self.fit_search_keys.keys())) == 0
|
3553
3530
|
and is_blocked_time_series(df, date_column, list(self.fit_search_keys.keys()) + [TARGET])
|
3554
3531
|
):
|
3555
3532
|
msg = self.bundle.get("multivariate_timeseries_detected")
|
3556
3533
|
self.__override_cv(CVType.blocked_time_series, msg, print_warning=False)
|
3557
|
-
elif self.
|
3534
|
+
elif self.model_task_type != ModelTaskType.REGRESSION:
|
3558
3535
|
msg = self.bundle.get("group_k_fold_in_classification")
|
3559
3536
|
self.__override_cv(CVType.group_k_fold, msg, print_warning=self.cv is not None)
|
3560
3537
|
group_columns = self._get_group_columns(df, self.fit_search_keys)
|
@@ -3592,39 +3569,32 @@ if response.status_code == 200:
|
|
3592
3569
|
def _encode_id_columns(
|
3593
3570
|
self,
|
3594
3571
|
X: pd.DataFrame,
|
3595
|
-
columns_renaming: Optional[Dict[str, str]] = None,
|
3596
3572
|
) -> Tuple[pd.DataFrame, Dict[str, List[Any]]]:
|
3597
|
-
columns_renaming = columns_renaming or {}
|
3598
3573
|
unknown_dict = {}
|
3599
3574
|
|
3600
3575
|
if self.id_columns and self.id_columns_encoder is not None:
|
3601
|
-
|
3602
|
-
|
3603
|
-
|
3604
|
-
|
3605
|
-
|
3606
|
-
|
3607
|
-
|
3608
|
-
|
3609
|
-
|
3610
|
-
|
3611
|
-
|
3612
|
-
|
3613
|
-
|
3614
|
-
if len(unknown_dict) > 0:
|
3615
|
-
self.logger.warning(f"Unknown values in id columns: {unknown_dict}")
|
3576
|
+
encoding_id_columns = [c for c in self.id_columns if c in X.columns]
|
3577
|
+
if len(encoding_id_columns) > 0:
|
3578
|
+
self.logger.info(f"Convert id columns to int: {encoding_id_columns}")
|
3579
|
+
encoded = self.id_columns_encoder.transform(X[encoding_id_columns])
|
3580
|
+
for i, c in enumerate(encoding_id_columns):
|
3581
|
+
unknown_values = X[encoded[:, i] == -1][c].unique().tolist()
|
3582
|
+
if len(unknown_values) > 0:
|
3583
|
+
unknown_dict[c] = unknown_values
|
3584
|
+
X[encoding_id_columns] = encoded
|
3585
|
+
X = X.loc[(X[encoding_id_columns] != -1).all(axis=1)]
|
3586
|
+
|
3587
|
+
if len(unknown_dict) > 0:
|
3588
|
+
self.logger.warning(f"Unknown values in id columns: {unknown_dict}")
|
3616
3589
|
|
3617
3590
|
return X, unknown_dict
|
3618
3591
|
|
3619
|
-
def _decode_id_columns(self, X: pd.DataFrame
|
3620
|
-
columns_renaming = columns_renaming or {}
|
3592
|
+
def _decode_id_columns(self, X: pd.DataFrame):
|
3621
3593
|
if self.id_columns and self.id_columns_encoder is not None:
|
3622
|
-
|
3623
|
-
|
3624
|
-
|
3625
|
-
|
3626
|
-
decoded = self.id_columns_encoder.inverse_transform(X[renamed_id_columns].rename(columns=columns_renaming))
|
3627
|
-
X[renamed_id_columns] = decoded
|
3594
|
+
decoding_id_columns = [c for c in self.id_columns if c in X.columns]
|
3595
|
+
if len(decoding_id_columns) > 0:
|
3596
|
+
decoded = self.id_columns_encoder.inverse_transform(X[self.id_columns])
|
3597
|
+
X[self.id_columns] = decoded
|
3628
3598
|
|
3629
3599
|
return X
|
3630
3600
|
|
@@ -4170,7 +4140,7 @@ if response.status_code == 200:
|
|
4170
4140
|
columns_to_sort = [date_column] if date_column is not None else []
|
4171
4141
|
|
4172
4142
|
do_sorting = True
|
4173
|
-
if self.id_columns and self.cv.is_time_series():
|
4143
|
+
if self.id_columns and self.cv is not None and self.cv.is_time_series():
|
4174
4144
|
# Check duplicates by date and id_columns
|
4175
4145
|
reversed_columns_renaming = {v: k for k, v in columns_renaming.items()}
|
4176
4146
|
renamed_id_columns = [reversed_columns_renaming.get(c, c) for c in self.id_columns]
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.2.113a2"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|