upgini 1.2.8__py3-none-any.whl → 1.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +13 -22
- upgini/metrics.py +4 -0
- upgini/utils/features_validator.py +1 -1
- {upgini-1.2.8.dist-info → upgini-1.2.9.dist-info}/METADATA +1 -1
- {upgini-1.2.8.dist-info → upgini-1.2.9.dist-info}/RECORD +8 -8
- {upgini-1.2.8.dist-info → upgini-1.2.9.dist-info}/WHEEL +0 -0
- {upgini-1.2.8.dist-info → upgini-1.2.9.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.9"
|
upgini/features_enricher.py
CHANGED
|
@@ -1103,7 +1103,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1103
1103
|
else:
|
|
1104
1104
|
eval_uplift = None
|
|
1105
1105
|
|
|
1106
|
-
effective_eval_set = eval_set if eval_set is not None else self.eval_set
|
|
1106
|
+
# effective_eval_set = eval_set if eval_set is not None else self.eval_set
|
|
1107
1107
|
eval_metrics = {
|
|
1108
1108
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
|
1109
1109
|
"quality_metrics_eval_segment"
|
|
@@ -1369,6 +1369,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1369
1369
|
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1370
1370
|
)
|
|
1371
1371
|
]
|
|
1372
|
+
self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
|
|
1372
1373
|
|
|
1373
1374
|
filtered_enriched_features = self.__filtered_enriched_features(
|
|
1374
1375
|
importance_threshold,
|
|
@@ -1435,31 +1436,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1435
1436
|
)
|
|
1436
1437
|
|
|
1437
1438
|
fitting_eval_set_dict = {}
|
|
1439
|
+
fitting_x_columns = fitting_X.columns.to_list()
|
|
1440
|
+
self.logger.info(f"Final list of fitting X columns: {fitting_x_columns}")
|
|
1441
|
+
fitting_enriched_x_columns = fitting_enriched_X.columns.to_list()
|
|
1442
|
+
self.logger.info(f"Final list of fitting enriched X columns: {fitting_enriched_x_columns}")
|
|
1438
1443
|
for idx, eval_tuple in eval_set_sampled_dict.items():
|
|
1439
1444
|
eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
|
|
1440
1445
|
eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
|
|
1441
1446
|
enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
|
|
1442
1447
|
enriched_eval_X, eval_y_sampled, self.cv
|
|
1443
1448
|
)
|
|
1444
|
-
fitting_eval_X = eval_X_sorted[
|
|
1445
|
-
fitting_enriched_eval_X = enriched_eval_X_sorted[
|
|
1446
|
-
|
|
1447
|
-
].copy()
|
|
1448
|
-
|
|
1449
|
-
# # Drop high cardinality features in eval set
|
|
1450
|
-
if len(columns_with_high_cardinality) > 0:
|
|
1451
|
-
fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
|
|
1452
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
|
|
1453
|
-
columns=columns_with_high_cardinality, errors="ignore"
|
|
1454
|
-
)
|
|
1455
|
-
# Drop constant features in eval_set
|
|
1456
|
-
if len(constant_columns) > 0:
|
|
1457
|
-
fitting_eval_X = fitting_eval_X.drop(columns=constant_columns, errors="ignore")
|
|
1458
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=constant_columns, errors="ignore")
|
|
1459
|
-
# Drop datetime features in eval_set
|
|
1460
|
-
if len(datetime_features) > 0:
|
|
1461
|
-
fitting_eval_X = fitting_eval_X.drop(columns=datetime_features, errors="ignore")
|
|
1462
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=datetime_features, errors="ignore")
|
|
1449
|
+
fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
|
|
1450
|
+
fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
|
|
1451
|
+
|
|
1463
1452
|
# Convert bool to string in eval_set
|
|
1464
1453
|
if len(bool_columns) > 0:
|
|
1465
1454
|
fitting_eval_X[col] = fitting_eval_X[col].astype(str)
|
|
@@ -1680,6 +1669,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1680
1669
|
X_sampled = enriched_Xy[x_columns].copy()
|
|
1681
1670
|
y_sampled = enriched_Xy[TARGET].copy()
|
|
1682
1671
|
enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
|
|
1672
|
+
enriched_X_columns = enriched_X.columns.to_list()
|
|
1683
1673
|
|
|
1684
1674
|
self.logger.info(f"Shape of enriched_X: {enriched_X.shape}")
|
|
1685
1675
|
self.logger.info(f"Shape of X after sampling: {X_sampled.shape}")
|
|
@@ -1694,7 +1684,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1694
1684
|
for idx in range(len(eval_set)):
|
|
1695
1685
|
eval_X_sampled = enriched_eval_sets[idx + 1][x_columns].copy()
|
|
1696
1686
|
eval_y_sampled = enriched_eval_sets[idx + 1][TARGET].copy()
|
|
1697
|
-
enriched_eval_X = enriched_eval_sets[idx + 1].
|
|
1687
|
+
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
|
1698
1688
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
|
1699
1689
|
|
|
1700
1690
|
self.__cached_sampled_datasets = (
|
|
@@ -1773,12 +1763,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1773
1763
|
X_sampled = enriched_Xy[x_columns].copy()
|
|
1774
1764
|
y_sampled = enriched_Xy[TARGET].copy()
|
|
1775
1765
|
enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX])
|
|
1766
|
+
enriched_X_columns = enriched_X.columns.tolist()
|
|
1776
1767
|
|
|
1777
1768
|
for idx in range(len(eval_set)):
|
|
1778
1769
|
enriched_eval_xy = enriched_df.query(f"{EVAL_SET_INDEX} == {idx + 1}")
|
|
1779
1770
|
eval_x_sampled = enriched_eval_xy[x_columns].copy()
|
|
1780
1771
|
eval_y_sampled = enriched_eval_xy[TARGET].copy()
|
|
1781
|
-
enriched_eval_x = enriched_eval_xy.
|
|
1772
|
+
enriched_eval_x = enriched_eval_xy[enriched_X_columns].copy()
|
|
1782
1773
|
eval_set_sampled_dict[idx] = (eval_x_sampled, enriched_eval_x, eval_y_sampled)
|
|
1783
1774
|
else:
|
|
1784
1775
|
self.logger.info("Transform without eval_set")
|
upgini/metrics.py
CHANGED
|
@@ -254,6 +254,7 @@ class EstimatorWrapper:
|
|
|
254
254
|
def _prepare_data(
|
|
255
255
|
self, x: pd.DataFrame, y: pd.Series, groups: Optional[np.ndarray] = None
|
|
256
256
|
) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
|
|
257
|
+
self.logger.info(f"Before preparing data columns: {x.columns.to_list()}")
|
|
257
258
|
for c in x.columns:
|
|
258
259
|
if is_numeric_dtype(x[c]):
|
|
259
260
|
x[c] = x[c].astype(float)
|
|
@@ -272,6 +273,7 @@ class EstimatorWrapper:
|
|
|
272
273
|
else:
|
|
273
274
|
x, y = self._remove_empty_target_rows(x, y)
|
|
274
275
|
|
|
276
|
+
self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
|
|
275
277
|
return x, y, groups
|
|
276
278
|
|
|
277
279
|
def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series]:
|
|
@@ -493,7 +495,9 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
493
495
|
if x[name].nunique() > 1:
|
|
494
496
|
unique_cat_features.append(name)
|
|
495
497
|
else:
|
|
498
|
+
self.logger.info(f"Drop column {name} on preparing data for fit")
|
|
496
499
|
x = x.drop(columns=name)
|
|
500
|
+
self.exclude_features.append(name)
|
|
497
501
|
self.cat_features = unique_cat_features
|
|
498
502
|
if (
|
|
499
503
|
hasattr(self.estimator, "get_param")
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=Oh3Y6CIypkhAjW-aquBTyP3_cA-gKgKTwq9EpcWpjps,22
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=rC3Lq1KvwJdSiITAUfYzBxdRtPkpOo6X2fqc3wWQfM4,187594
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
10
|
+
upgini/metrics.py,sha256=Swp-innl6XrdK6Dy6uLTVxmkzPRqFbCxfYxQUsK_-w8,31222
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
13
|
upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1389
|
|
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
|
|
|
47
47
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
48
48
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
49
49
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
50
|
-
upgini/utils/features_validator.py,sha256=
|
|
50
|
+
upgini/utils/features_validator.py,sha256=LIF6YMpHlxCrVz6mvMpc1kfNTIMVGlNCor7IJTmlSfI,3307
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
52
|
upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
|
|
53
53
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.9.dist-info/METADATA,sha256=3mB0qUIeWVpka3vMeXq-t7STUZxVKQb5NpGBpFA9zlw,48607
|
|
61
|
+
upgini-1.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.9.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|