upgini 1.2.10__py3-none-any.whl → 1.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.10"
1
+ __version__ = "1.2.11"
@@ -1577,7 +1577,7 @@ class FeaturesEnricher(TransformerMixin):
1577
1577
  df = generator.generate(df)
1578
1578
  generated_features.extend(generator.generated_features)
1579
1579
 
1580
- normalizer = Normalizer(self.search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
1580
+ normalizer = Normalizer(search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
1581
1581
  df = normalizer.normalize(df)
1582
1582
  columns_renaming = normalizer.columns_renaming
1583
1583
 
@@ -2522,7 +2522,7 @@ class FeaturesEnricher(TransformerMixin):
2522
2522
  features_columns = [c for c in df.columns if c not in non_feature_columns]
2523
2523
 
2524
2524
  features_to_drop = FeaturesValidator(self.logger).validate(
2525
- df, features_columns, self.generate_features, self.warning_counter
2525
+ df, features_columns, self.generate_features, self.warning_counter, columns_renaming
2526
2526
  )
2527
2527
  self.fit_dropped_features.update(features_to_drop)
2528
2528
  df = df.drop(columns=features_to_drop)
@@ -190,7 +190,7 @@ ads_upload_too_few_rows=At least 1000 records per sample are needed. Increase th
190
190
  ads_upload_search_key_not_found=Search key {} wasn't found in dataframe columns
191
191
  ads_upload_to_many_empty_rows=More than 50% of rows in the submitted sample doesn't contain valid keys\nPlease fill the key columns with valid values and resubmit the data
192
192
  # Features info warning
193
- features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
193
+ features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats.\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
194
194
  features_info_zero_hit_rate_search_keys=Oops, looks like values/formats of the search keys {} might be incorrect,\nas we won't be able to match any data source using these values\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
195
195
  features_not_generated=\nWARNING: Following features didn't pass checks for automated feature generation: {}
196
196
 
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from logging import Logger
3
- from typing import List, Optional
3
+ from typing import Dict, List, Optional
4
4
 
5
5
  import pandas as pd
6
6
  from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
@@ -23,6 +23,7 @@ class FeaturesValidator:
23
23
  features: List[str],
24
24
  features_for_generate: Optional[List[str]],
25
25
  warning_counter: WarningCounter,
26
+ columns_renaming: Optional[Dict[str, str]] = None,
26
27
  ) -> List[str]:
27
28
  # one_hot_encoded_features = []
28
29
  empty_or_constant_features = []
@@ -56,7 +57,11 @@ class FeaturesValidator:
56
57
  # warning_counter.increment()
57
58
 
58
59
  if empty_or_constant_features:
59
- msg = bundle.get("empty_or_contant_features").format(empty_or_constant_features)
60
+ if columns_renaming:
61
+ display_names = [columns_renaming.get(f, f) for f in empty_or_constant_features]
62
+ else:
63
+ display_names = empty_or_constant_features
64
+ msg = bundle.get("empty_or_contant_features").format(display_names)
60
65
  print(msg)
61
66
  self.logger.warning(msg)
62
67
  warning_counter.increment()
@@ -65,7 +70,11 @@ class FeaturesValidator:
65
70
  if features_for_generate:
66
71
  high_cardinality_features = [f for f in high_cardinality_features if f not in features_for_generate]
67
72
  if high_cardinality_features:
68
- msg = bundle.get("high_cardinality_features").format(high_cardinality_features)
73
+ if columns_renaming:
74
+ display_names = [columns_renaming.get(f, f) for f in high_cardinality_features]
75
+ else:
76
+ display_names = empty_or_constant_features
77
+ msg = bundle.get("high_cardinality_features").format(display_names)
69
78
  print(msg)
70
79
  self.logger.warning(msg)
71
80
  warning_counter.increment()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.10
3
+ Version: 1.2.11
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=GI4rgymQsPWdk2_d96NgmZBRuFM6yOZB-kysnrjBjVo,23
1
+ upgini/__about__.py,sha256=aBWZsCYiXXcSUsUJr3tOTQWsH7ZDqJzyMYdQbOd5Qtc,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=NIroiDLvlWtfxE9qqYYoB6ZTLgpGweRaCVcL8osXoI8,187995
6
+ upgini/features_enricher.py,sha256=eRkI2qpV-IprB1dQAMxzto6I6Q3b3SBuDMVR1_OFlyA,188008
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=bHRPWCNrUvt2R9qMX6dZFCJ0i8ENVCQ2Rw3dHH9IJEg,7447
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=WZAuYPX2Dpn6BHoA3RX8uvMNMr-yJE2fF7Gz0i24x2s,26459
33
+ upgini/resource_bundle/strings.properties,sha256=faj0wJHppGTKCTbXW8KjqLuGyFNjgb5evEMeSrq_LCE,26460
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
47
47
  upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
48
48
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
- upgini/utils/features_validator.py,sha256=LIF6YMpHlxCrVz6mvMpc1kfNTIMVGlNCor7IJTmlSfI,3307
50
+ upgini/utils/features_validator.py,sha256=lf5Z-taTl98p7nAWQIyM0dUfkodbzjxv0mOSIZl1jRU,3760
51
51
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
52
52
  upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
53
53
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.10.dist-info/METADATA,sha256=Cc-4FefWQaLK1hlCIR_dMIAm_NHRD9HxquQbTZn986E,48577
61
- upgini-1.2.10.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.10.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.10.dist-info/RECORD,,
60
+ upgini-1.2.11.dist-info/METADATA,sha256=DKUOOrexxVQVXzyaD9sXsPyT8VYx2eys3oKt15nVGtI,48577
61
+ upgini-1.2.11.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.11.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.11.dist-info/RECORD,,