upgini 1.2.9a101__py3-none-any.whl → 1.2.9a103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +11 -21
- {upgini-1.2.9a101.dist-info → upgini-1.2.9a103.dist-info}/METADATA +1 -1
- {upgini-1.2.9a101.dist-info → upgini-1.2.9a103.dist-info}/RECORD +6 -6
- {upgini-1.2.9a101.dist-info → upgini-1.2.9a103.dist-info}/WHEEL +0 -0
- {upgini-1.2.9a101.dist-info → upgini-1.2.9a103.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.9a103"
|
upgini/features_enricher.py
CHANGED
|
@@ -1369,6 +1369,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1369
1369
|
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1370
1370
|
)
|
|
1371
1371
|
]
|
|
1372
|
+
self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
|
|
1372
1373
|
|
|
1373
1374
|
filtered_enriched_features = self.__filtered_enriched_features(
|
|
1374
1375
|
importance_threshold,
|
|
@@ -1435,31 +1436,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1435
1436
|
)
|
|
1436
1437
|
|
|
1437
1438
|
fitting_eval_set_dict = {}
|
|
1439
|
+
fitting_x_columns = fitting_X.columns.to_list()
|
|
1440
|
+
self.logger.info(f"Final list of fitting X columns: {fitting_x_columns}")
|
|
1441
|
+
fitting_enriched_x_columns = fitting_enriched_X.columns.to_list()
|
|
1442
|
+
self.logger.info(f"Final list of fitting enriched X columns: {fitting_enriched_x_columns}")
|
|
1438
1443
|
for idx, eval_tuple in eval_set_sampled_dict.items():
|
|
1439
1444
|
eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
|
|
1440
1445
|
eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
|
|
1441
1446
|
enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
|
|
1442
1447
|
enriched_eval_X, eval_y_sampled, self.cv
|
|
1443
1448
|
)
|
|
1444
|
-
fitting_eval_X = eval_X_sorted[
|
|
1445
|
-
fitting_enriched_eval_X = enriched_eval_X_sorted[
|
|
1446
|
-
|
|
1447
|
-
].copy()
|
|
1448
|
-
|
|
1449
|
-
# # Drop high cardinality features in eval set
|
|
1450
|
-
if len(columns_with_high_cardinality) > 0:
|
|
1451
|
-
fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
|
|
1452
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
|
|
1453
|
-
columns=columns_with_high_cardinality, errors="ignore"
|
|
1454
|
-
)
|
|
1455
|
-
# Drop constant features in eval_set
|
|
1456
|
-
if len(constant_columns) > 0:
|
|
1457
|
-
fitting_eval_X = fitting_eval_X.drop(columns=constant_columns, errors="ignore")
|
|
1458
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=constant_columns, errors="ignore")
|
|
1459
|
-
# Drop datetime features in eval_set
|
|
1460
|
-
if len(datetime_features) > 0:
|
|
1461
|
-
fitting_eval_X = fitting_eval_X.drop(columns=datetime_features, errors="ignore")
|
|
1462
|
-
fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=datetime_features, errors="ignore")
|
|
1449
|
+
fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
|
|
1450
|
+
fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
|
|
1451
|
+
|
|
1463
1452
|
# Convert bool to string in eval_set
|
|
1464
1453
|
if len(bool_columns) > 0:
|
|
1465
1454
|
fitting_eval_X[col] = fitting_eval_X[col].astype(str)
|
|
@@ -1680,6 +1669,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1680
1669
|
X_sampled = enriched_Xy[x_columns].copy()
|
|
1681
1670
|
y_sampled = enriched_Xy[TARGET].copy()
|
|
1682
1671
|
enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
|
|
1672
|
+
enriched_X_columns = enriched_X.columns.to_list()
|
|
1683
1673
|
|
|
1684
1674
|
self.logger.info(f"Shape of enriched_X: {enriched_X.shape}")
|
|
1685
1675
|
self.logger.info(f"Shape of X after sampling: {X_sampled.shape}")
|
|
@@ -1694,7 +1684,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1694
1684
|
for idx in range(len(eval_set)):
|
|
1695
1685
|
eval_X_sampled = enriched_eval_sets[idx + 1][x_columns].copy()
|
|
1696
1686
|
eval_y_sampled = enriched_eval_sets[idx + 1][TARGET].copy()
|
|
1697
|
-
enriched_eval_X = enriched_eval_sets[idx + 1].
|
|
1687
|
+
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
|
1698
1688
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
|
1699
1689
|
|
|
1700
1690
|
self.__cached_sampled_datasets = (
|
|
@@ -1779,7 +1769,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1779
1769
|
enriched_eval_xy = enriched_df.query(f"{EVAL_SET_INDEX} == {idx + 1}")
|
|
1780
1770
|
eval_x_sampled = enriched_eval_xy[x_columns].copy()
|
|
1781
1771
|
eval_y_sampled = enriched_eval_xy[TARGET].copy()
|
|
1782
|
-
enriched_eval_x =
|
|
1772
|
+
enriched_eval_x = enriched_eval_xy[enriched_X_columns].copy()
|
|
1783
1773
|
eval_set_sampled_dict[idx] = (eval_x_sampled, enriched_eval_x, eval_y_sampled)
|
|
1784
1774
|
else:
|
|
1785
1775
|
self.logger.info("Transform without eval_set")
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=o549W6DzVxIhZOMLLPwCZL5F6Uhjpdh3LlU4o1btKU8,26
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=Jqqcyz3RjWmgHeHARPym2P0W4YSSjwqPj-Yf4Ul4x58,187592
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.9a103.dist-info/METADATA,sha256=G7fkiWyFYEld9G3yo5DVdJS7hmsxgprl5AzpTl6WuQU,48611
|
|
61
|
+
upgini-1.2.9a103.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.9a103.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.9a103.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|