upgini 1.1.312a4__py3-none-any.whl → 1.1.312a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +12 -7
- {upgini-1.1.312a4.dist-info → upgini-1.1.312a5.dist-info}/METADATA +1 -1
- {upgini-1.1.312a4.dist-info → upgini-1.1.312a5.dist-info}/RECORD +6 -6
- {upgini-1.1.312a4.dist-info → upgini-1.1.312a5.dist-info}/WHEEL +0 -0
- {upgini-1.1.312a4.dist-info → upgini-1.1.312a5.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.312a5"
|
upgini/features_enricher.py
CHANGED
|
@@ -91,7 +91,11 @@ from upgini.utils.display_utils import (
|
|
|
91
91
|
prepare_and_show_report,
|
|
92
92
|
show_request_quote_button,
|
|
93
93
|
)
|
|
94
|
-
from upgini.utils.email_utils import
|
|
94
|
+
from upgini.utils.email_utils import (
|
|
95
|
+
EmailDomainGenerator,
|
|
96
|
+
EmailSearchKeyConverter,
|
|
97
|
+
EmailSearchKeyDetector,
|
|
98
|
+
)
|
|
95
99
|
from upgini.utils.features_validator import FeaturesValidator
|
|
96
100
|
from upgini.utils.format import Format
|
|
97
101
|
from upgini.utils.ip_utils import IpSearchKeyConverter
|
|
@@ -1030,7 +1034,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1030
1034
|
self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
|
|
1031
1035
|
}
|
|
1032
1036
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
|
1033
|
-
|
|
1037
|
+
effective_y
|
|
1034
1038
|
):
|
|
1035
1039
|
train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
|
1036
1040
|
np.mean(effective_y), 4
|
|
@@ -1103,7 +1107,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1103
1107
|
# self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
|
|
1104
1108
|
}
|
|
1105
1109
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
|
1106
|
-
|
|
1110
|
+
effective_eval_set[idx][1]
|
|
1107
1111
|
):
|
|
1108
1112
|
eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
|
1109
1113
|
np.mean(effective_eval_set[idx][1]), 4
|
|
@@ -1363,6 +1367,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1363
1367
|
importance_threshold,
|
|
1364
1368
|
max_features,
|
|
1365
1369
|
)
|
|
1370
|
+
filtered_enriched_features = [c for c in filtered_enriched_features if c not in client_features]
|
|
1366
1371
|
|
|
1367
1372
|
X_sorted, y_sorted = self._sort_by_system_record_id(X_sampled, y_sampled, self.cv)
|
|
1368
1373
|
enriched_X_sorted, enriched_y_sorted = self._sort_by_system_record_id(enriched_X, y_sampled, self.cv)
|
|
@@ -2217,7 +2222,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2217
2222
|
result = enrich()
|
|
2218
2223
|
|
|
2219
2224
|
filtered_columns = self.__filtered_enriched_features(importance_threshold, max_features)
|
|
2220
|
-
existing_filtered_columns = [
|
|
2225
|
+
existing_filtered_columns = [
|
|
2226
|
+
c for c in filtered_columns if c in result.columns and c not in validated_X.columns
|
|
2227
|
+
]
|
|
2221
2228
|
selecting_columns = validated_X.columns.tolist() + generated_features + existing_filtered_columns
|
|
2222
2229
|
if add_fit_system_record_id:
|
|
2223
2230
|
selecting_columns.append(SORT_ID)
|
|
@@ -2430,9 +2437,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2430
2437
|
|
|
2431
2438
|
email_columns = SearchKey.find_all_keys(self.fit_search_keys, SearchKey.EMAIL)
|
|
2432
2439
|
if email_columns:
|
|
2433
|
-
generator = EmailDomainGenerator(
|
|
2434
|
-
email_columns
|
|
2435
|
-
)
|
|
2440
|
+
generator = EmailDomainGenerator(email_columns)
|
|
2436
2441
|
df = generator.generate(df)
|
|
2437
2442
|
self.fit_generated_features.extend(generator.generated_features)
|
|
2438
2443
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=AYWzaD5lVHd4s3vopg53TBKzkKgWzJVi62VrmfO7rRU,26
|
|
2
2
|
upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=c6jghh32P9_2CspELYCOsmNIOiShuCADnCCJ8Jj2t50,30834
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=LqGOMObkFsAm58sBL3UhTmc7TOnDQmLivxl3jbXh-n0,187132
|
|
7
7
|
upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
|
|
8
8
|
upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
|
|
9
9
|
upgini/metadata.py,sha256=YQ-1HZGyPOksP2iM50ff_pMHXLyzvpChqSfNh8Z0ke4,10833
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
60
|
+
upgini-1.1.312a5.dist-info/METADATA,sha256=MdhSWCWMCuajyC7B1bVWfVFOs7b5iIsmD7m3Z48egng,48155
|
|
61
|
+
upgini-1.1.312a5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.1.312a5.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.312a5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|