upgini 1.2.29a5__py3-none-any.whl → 1.2.29a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +26 -14
- {upgini-1.2.29a5.dist-info → upgini-1.2.29a6.dist-info}/METADATA +1 -1
- {upgini-1.2.29a5.dist-info → upgini-1.2.29a6.dist-info}/RECORD +6 -6
- {upgini-1.2.29a5.dist-info → upgini-1.2.29a6.dist-info}/WHEEL +0 -0
- {upgini-1.2.29a5.dist-info → upgini-1.2.29a6.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.29a6"
|
upgini/features_enricher.py
CHANGED
|
@@ -2,6 +2,7 @@ import dataclasses
|
|
|
2
2
|
import datetime
|
|
3
3
|
import gc
|
|
4
4
|
import hashlib
|
|
5
|
+
import itertools
|
|
5
6
|
import logging
|
|
6
7
|
import numbers
|
|
7
8
|
import os
|
|
@@ -159,6 +160,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
159
160
|
|
|
160
161
|
shared_datasets: list of str, optional (default=None)
|
|
161
162
|
List of private shared dataset ids for custom search
|
|
163
|
+
|
|
164
|
+
select_features: bool, optional (default=False)
|
|
165
|
+
If True, return only selected features both from input and data sources.
|
|
166
|
+
Otherwise, return all features from input and only selected features from data sources.
|
|
162
167
|
"""
|
|
163
168
|
|
|
164
169
|
TARGET_NAME = "target"
|
|
@@ -279,7 +284,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
279
284
|
self._relevant_data_sources_wo_links: pd.DataFrame = self.EMPTY_DATA_SOURCES
|
|
280
285
|
self.metrics: Optional[pd.DataFrame] = None
|
|
281
286
|
self.feature_names_ = []
|
|
282
|
-
self.
|
|
287
|
+
self.dropped_client_feature_names_ = []
|
|
283
288
|
self.feature_importances_ = []
|
|
284
289
|
self.search_id = search_id
|
|
285
290
|
self.select_features = select_features
|
|
@@ -2071,7 +2076,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2071
2076
|
is_demo_dataset = hash_input(validated_X) in DEMO_DATASET_HASHES
|
|
2072
2077
|
|
|
2073
2078
|
columns_to_drop = [
|
|
2074
|
-
c for c in validated_X.columns if c in self.feature_names_ and c
|
|
2079
|
+
c for c in validated_X.columns if c in self.feature_names_ and c in self.dropped_client_feature_names_
|
|
2075
2080
|
]
|
|
2076
2081
|
if len(columns_to_drop) > 0:
|
|
2077
2082
|
msg = self.bundle.get("x_contains_enriching_columns").format(columns_to_drop)
|
|
@@ -2328,11 +2333,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2328
2333
|
else:
|
|
2329
2334
|
result = enrich()
|
|
2330
2335
|
|
|
2331
|
-
|
|
2332
|
-
|
|
2333
|
-
|
|
2336
|
+
selecting_columns = [
|
|
2337
|
+
c
|
|
2338
|
+
for c in itertools.chain(validated_X.columns.tolist(), generated_features)
|
|
2339
|
+
if c not in self.dropped_client_feature_names_
|
|
2334
2340
|
]
|
|
2335
|
-
|
|
2341
|
+
filtered_columns = self.__filtered_enriched_features(importance_threshold, max_features)
|
|
2342
|
+
selecting_columns.extend(c for c in filtered_columns if c in result.columns and c not in validated_X.columns)
|
|
2336
2343
|
if add_fit_system_record_id:
|
|
2337
2344
|
selecting_columns.append(SORT_ID)
|
|
2338
2345
|
|
|
@@ -3510,7 +3517,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3510
3517
|
features_df = self._search_task.get_all_initial_raw_features(trace_id, metrics_calculation=True)
|
|
3511
3518
|
|
|
3512
3519
|
self.feature_names_ = []
|
|
3513
|
-
self.
|
|
3520
|
+
self.dropped_client_feature_names_ = []
|
|
3514
3521
|
self.feature_importances_ = []
|
|
3515
3522
|
features_info = []
|
|
3516
3523
|
features_info_without_links = []
|
|
@@ -3520,18 +3527,23 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3520
3527
|
for feature_meta in features_meta:
|
|
3521
3528
|
if feature_meta.name in original_names_dict.keys():
|
|
3522
3529
|
feature_meta.name = original_names_dict[feature_meta.name]
|
|
3530
|
+
|
|
3531
|
+
is_client_feature = feature_meta.name in x_columns
|
|
3532
|
+
|
|
3533
|
+
if feature_meta.shap_value == 0.0:
|
|
3534
|
+
if self.select_features:
|
|
3535
|
+
self.dropped_client_feature_names_.append(feature_meta.name)
|
|
3536
|
+
continue
|
|
3537
|
+
|
|
3523
3538
|
# Use only important features
|
|
3524
3539
|
if (
|
|
3525
|
-
|
|
3526
|
-
or
|
|
3527
|
-
|
|
3540
|
+
feature_meta.name in self.fit_generated_features
|
|
3541
|
+
or feature_meta.name == COUNTRY
|
|
3542
|
+
# In select_features mode we select also from etalon features and need to show them
|
|
3543
|
+
or (not self.select_features and is_client_feature)
|
|
3528
3544
|
):
|
|
3529
3545
|
continue
|
|
3530
3546
|
|
|
3531
|
-
is_client_feature = feature_meta.name in x_columns
|
|
3532
|
-
# In select_features mode we select also from etalon features and need to show them
|
|
3533
|
-
if not self.select_features and is_client_feature:
|
|
3534
|
-
continue
|
|
3535
3547
|
|
|
3536
3548
|
self.feature_names_.append(feature_meta.name)
|
|
3537
3549
|
self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=IwPdYvZC3KafuIyZFkN_uViBDHIV_KryoYm_uF-6Z5k,25
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=HAu6ZZSCW5BJ83fxuGjuEy2h283EO1sr3j_eUcVldsY,190873
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.29a6.dist-info/METADATA,sha256=7ZvyeiohsWXQQ1j_7N2H6yVKKhe4pUEpRORxtFBGcH8,48580
|
|
63
|
+
upgini-1.2.29a6.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
64
|
+
upgini-1.2.29a6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.29a6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|