upgini 1.2.10__py3-none-any.whl → 1.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +2 -2
- upgini/resource_bundle/strings.properties +1 -1
- upgini/utils/features_validator.py +12 -3
- {upgini-1.2.10.dist-info → upgini-1.2.11.dist-info}/METADATA +1 -1
- {upgini-1.2.10.dist-info → upgini-1.2.11.dist-info}/RECORD +8 -8
- {upgini-1.2.10.dist-info → upgini-1.2.11.dist-info}/WHEEL +0 -0
- {upgini-1.2.10.dist-info → upgini-1.2.11.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.11"
|
upgini/features_enricher.py
CHANGED
|
@@ -1577,7 +1577,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1577
1577
|
df = generator.generate(df)
|
|
1578
1578
|
generated_features.extend(generator.generated_features)
|
|
1579
1579
|
|
|
1580
|
-
normalizer = Normalizer(
|
|
1580
|
+
normalizer = Normalizer(search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
|
|
1581
1581
|
df = normalizer.normalize(df)
|
|
1582
1582
|
columns_renaming = normalizer.columns_renaming
|
|
1583
1583
|
|
|
@@ -2522,7 +2522,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2522
2522
|
features_columns = [c for c in df.columns if c not in non_feature_columns]
|
|
2523
2523
|
|
|
2524
2524
|
features_to_drop = FeaturesValidator(self.logger).validate(
|
|
2525
|
-
df, features_columns, self.generate_features, self.warning_counter
|
|
2525
|
+
df, features_columns, self.generate_features, self.warning_counter, columns_renaming
|
|
2526
2526
|
)
|
|
2527
2527
|
self.fit_dropped_features.update(features_to_drop)
|
|
2528
2528
|
df = df.drop(columns=features_to_drop)
|
|
@@ -190,7 +190,7 @@ ads_upload_too_few_rows=At least 1000 records per sample are needed. Increase th
|
|
|
190
190
|
ads_upload_search_key_not_found=Search key {} wasn't found in dataframe columns
|
|
191
191
|
ads_upload_to_many_empty_rows=More than 50% of rows in the submitted sample doesn't contain valid keys\nPlease fill the key columns with valid values and resubmit the data
|
|
192
192
|
# Features info warning
|
|
193
|
-
features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats
|
|
193
|
+
features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats.\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
|
|
194
194
|
features_info_zero_hit_rate_search_keys=Oops, looks like values/formats of the search keys {} might be incorrect,\nas we won't be able to match any data source using these values\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
|
|
195
195
|
features_not_generated=\nWARNING: Following features didn't pass checks for automated feature generation: {}
|
|
196
196
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from logging import Logger
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
|
|
@@ -23,6 +23,7 @@ class FeaturesValidator:
|
|
|
23
23
|
features: List[str],
|
|
24
24
|
features_for_generate: Optional[List[str]],
|
|
25
25
|
warning_counter: WarningCounter,
|
|
26
|
+
columns_renaming: Optional[Dict[str, str]] = None,
|
|
26
27
|
) -> List[str]:
|
|
27
28
|
# one_hot_encoded_features = []
|
|
28
29
|
empty_or_constant_features = []
|
|
@@ -56,7 +57,11 @@ class FeaturesValidator:
|
|
|
56
57
|
# warning_counter.increment()
|
|
57
58
|
|
|
58
59
|
if empty_or_constant_features:
|
|
59
|
-
|
|
60
|
+
if columns_renaming:
|
|
61
|
+
display_names = [columns_renaming.get(f, f) for f in empty_or_constant_features]
|
|
62
|
+
else:
|
|
63
|
+
display_names = empty_or_constant_features
|
|
64
|
+
msg = bundle.get("empty_or_contant_features").format(display_names)
|
|
60
65
|
print(msg)
|
|
61
66
|
self.logger.warning(msg)
|
|
62
67
|
warning_counter.increment()
|
|
@@ -65,7 +70,11 @@ class FeaturesValidator:
|
|
|
65
70
|
if features_for_generate:
|
|
66
71
|
high_cardinality_features = [f for f in high_cardinality_features if f not in features_for_generate]
|
|
67
72
|
if high_cardinality_features:
|
|
68
|
-
|
|
73
|
+
if columns_renaming:
|
|
74
|
+
display_names = [columns_renaming.get(f, f) for f in high_cardinality_features]
|
|
75
|
+
else:
|
|
76
|
+
display_names = empty_or_constant_features
|
|
77
|
+
msg = bundle.get("high_cardinality_features").format(display_names)
|
|
69
78
|
print(msg)
|
|
70
79
|
self.logger.warning(msg)
|
|
71
80
|
warning_counter.increment()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=aBWZsCYiXXcSUsUJr3tOTQWsH7ZDqJzyMYdQbOd5Qtc,23
|
|
2
2
|
upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=eRkI2qpV-IprB1dQAMxzto6I6Q3b3SBuDMVR1_OFlyA,188008
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
30
30
|
upgini/normalizer/normalize_utils.py,sha256=bHRPWCNrUvt2R9qMX6dZFCJ0i8ENVCQ2Rw3dHH9IJEg,7447
|
|
31
31
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
32
32
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
33
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
33
|
+
upgini/resource_bundle/strings.properties,sha256=faj0wJHppGTKCTbXW8KjqLuGyFNjgb5evEMeSrq_LCE,26460
|
|
34
34
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
35
35
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
|
|
|
47
47
|
upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
|
|
48
48
|
upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
|
|
49
49
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
50
|
-
upgini/utils/features_validator.py,sha256=
|
|
50
|
+
upgini/utils/features_validator.py,sha256=lf5Z-taTl98p7nAWQIyM0dUfkodbzjxv0mOSIZl1jRU,3760
|
|
51
51
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
52
52
|
upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
|
|
53
53
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.2.
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
60
|
+
upgini-1.2.11.dist-info/METADATA,sha256=DKUOOrexxVQVXzyaD9sXsPyT8VYx2eys3oKt15nVGtI,48577
|
|
61
|
+
upgini-1.2.11.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.2.11.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.2.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|