upgini 1.2.10__py3-none-any.whl → 1.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +2 -2
- upgini/resource_bundle/strings.properties +2 -2
- upgini/utils/features_validator.py +13 -4
- {upgini-1.2.10.dist-info → upgini-1.2.12.dist-info}/METADATA +1 -1
- {upgini-1.2.10.dist-info → upgini-1.2.12.dist-info}/RECORD +8 -8
- {upgini-1.2.10.dist-info → upgini-1.2.12.dist-info}/WHEEL +0 -0
- {upgini-1.2.10.dist-info → upgini-1.2.12.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.12"
|
upgini/features_enricher.py
CHANGED
|
@@ -1577,7 +1577,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1577
1577
|
df = generator.generate(df)
|
|
1578
1578
|
generated_features.extend(generator.generated_features)
|
|
1579
1579
|
|
|
1580
|
-
normalizer = Normalizer(
|
|
1580
|
+
normalizer = Normalizer(search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
|
|
1581
1581
|
df = normalizer.normalize(df)
|
|
1582
1582
|
columns_renaming = normalizer.columns_renaming
|
|
1583
1583
|
|
|
@@ -2522,7 +2522,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2522
2522
|
features_columns = [c for c in df.columns if c not in non_feature_columns]
|
|
2523
2523
|
|
|
2524
2524
|
features_to_drop = FeaturesValidator(self.logger).validate(
|
|
2525
|
-
df, features_columns, self.generate_features, self.warning_counter
|
|
2525
|
+
df, features_columns, self.generate_features, self.warning_counter, columns_renaming
|
|
2526
2526
|
)
|
|
2527
2527
|
self.fit_dropped_features.update(features_to_drop)
|
|
2528
2528
|
df = df.drop(columns=features_to_drop)
|
|
@@ -22,7 +22,7 @@ slack_community_bage=https://img.shields.io/badge/slack-@upgini-orange.svg?logo=
|
|
|
22
22
|
slack_community_alt=Upgini Slack community
|
|
23
23
|
version_warning=\nWARNING: Unsupported library version detected {},\nplease update with “%pip install -U upgini” to the latest {} and restart Jupyter kernel
|
|
24
24
|
unregistered_with_personal_keys=\nWARNING: Search key {} can be used only with personal api_key from profile.upgini.com It will be ignored
|
|
25
|
-
date_only_search=\nWARNING: Search started with DATE search key only\nTry to add other keys like the COUNTRY, POSTAL_CODE, PHONE NUMBER, EMAIL/HEM,
|
|
25
|
+
date_only_search=\nWARNING: Search started with DATE search key only\nTry to add other keys like the COUNTRY, POSTAL_CODE, PHONE NUMBER, EMAIL/HEM, IP to your training dataset\nfor search through all the available data sources.\nSee docs https://github.com/upgini/upgini#-total-239-countries-and-up-to-41-years-of-history
|
|
26
26
|
date_search_without_time_series=\nWARNING: Looks like your training dataset is a time series. We recommend to set `cv=CVType.time_series` param for correct search results.\nSee docs https://github.com/upgini/upgini#-time-series-prediction-support
|
|
27
27
|
metrics_exclude_paid_features=\nWARNING: Metrics calculated after enrichment has a free features only. To calculate metrics with a full set of relevant features, including commercial data sources, please contact support team:
|
|
28
28
|
metrics_no_important_free_features=\nWARNING: No important free features to calculate metrics
|
|
@@ -190,7 +190,7 @@ ads_upload_too_few_rows=At least 1000 records per sample are needed. Increase th
|
|
|
190
190
|
ads_upload_search_key_not_found=Search key {} wasn't found in dataframe columns
|
|
191
191
|
ads_upload_to_many_empty_rows=More than 50% of rows in the submitted sample doesn't contain valid keys\nPlease fill the key columns with valid values and resubmit the data
|
|
192
192
|
# Features info warning
|
|
193
|
-
features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats
|
|
193
|
+
features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats.\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
|
|
194
194
|
features_info_zero_hit_rate_search_keys=Oops, looks like values/formats of the search keys {} might be incorrect,\nas we won't be able to match any data source using these values\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
|
|
195
195
|
features_not_generated=\nWARNING: Following features didn't pass checks for automated feature generation: {}
|
|
196
196
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from logging import Logger
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
|
|
@@ -23,6 +23,7 @@ class FeaturesValidator:
|
|
|
23
23
|
features: List[str],
|
|
24
24
|
features_for_generate: Optional[List[str]],
|
|
25
25
|
warning_counter: WarningCounter,
|
|
26
|
+
columns_renaming: Optional[Dict[str, str]] = None,
|
|
26
27
|
) -> List[str]:
|
|
27
28
|
# one_hot_encoded_features = []
|
|
28
29
|
empty_or_constant_features = []
|
|
@@ -55,17 +56,25 @@ class FeaturesValidator:
|
|
|
55
56
|
# self.logger.warning(msg)
|
|
56
57
|
# warning_counter.increment()
|
|
57
58
|
|
|
59
|
+
columns_renaming = columns_renaming or {}
|
|
60
|
+
|
|
58
61
|
if empty_or_constant_features:
|
|
59
|
-
msg = bundle.get("empty_or_contant_features").format(
|
|
62
|
+
msg = bundle.get("empty_or_contant_features").format(
|
|
63
|
+
[columns_renaming.get(f, f) for f in empty_or_constant_features]
|
|
64
|
+
)
|
|
60
65
|
print(msg)
|
|
61
66
|
self.logger.warning(msg)
|
|
62
67
|
warning_counter.increment()
|
|
63
68
|
|
|
64
69
|
high_cardinality_features = self.find_high_cardinality(df[features])
|
|
65
70
|
if features_for_generate:
|
|
66
|
-
high_cardinality_features = [
|
|
71
|
+
high_cardinality_features = [
|
|
72
|
+
f for f in high_cardinality_features if columns_renaming.get(f, f) not in features_for_generate
|
|
73
|
+
]
|
|
67
74
|
if high_cardinality_features:
|
|
68
|
-
msg = bundle.get("high_cardinality_features").format(
|
|
75
|
+
msg = bundle.get("high_cardinality_features").format(
|
|
76
|
+
[columns_renaming.get(f, f) for f in high_cardinality_features]
|
|
77
|
+
)
|
|
69
78
|
print(msg)
|
|
70
79
|
self.logger.warning(msg)
|
|
71
80
|
warning_counter.increment()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=dbW85A2PinQCZabwD2DNDTfOE9315GDtQQKAsJP8IXk,23
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=eRkI2qpV-IprB1dQAMxzto6I6Q3b3SBuDMVR1_OFlyA,188008
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
30
30
|
upgini/normalizer/normalize_utils.py,sha256=bHRPWCNrUvt2R9qMX6dZFCJ0i8ENVCQ2Rw3dHH9IJEg,7447
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
33
|
+
upgini/resource_bundle/strings.properties,sha256=hWldMqtv80lwv8HV00Hk2-3tflu4BkD6tiXOfGDZPl8,26458
|
|
34
34
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
35
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
|
|
|
47
47
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
48
48
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
49
49
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
50
|
-
upgini/utils/features_validator.py,sha256=
|
|
50
|
+
upgini/utils/features_validator.py,sha256=yiOdzVtpArELMufzAa9mtWq32lETB6sIF-w3Yvl3vV8,3614
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
52
|
upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
|
|
53
53
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.12.dist-info/METADATA,sha256=k_J1xVbmpvm56wJ_hDo17cEK6rXRhhqJp3rSbw233xA,48577
|
|
61
|
+
upgini-1.2.12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.12.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|