upgini 1.2.10__py3-none-any.whl → 1.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.10"
1
+ __version__ = "1.2.12"
@@ -1577,7 +1577,7 @@ class FeaturesEnricher(TransformerMixin):
1577
1577
  df = generator.generate(df)
1578
1578
  generated_features.extend(generator.generated_features)
1579
1579
 
1580
- normalizer = Normalizer(self.search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
1580
+ normalizer = Normalizer(search_keys, generated_features, self.bundle, self.logger, self.warning_counter)
1581
1581
  df = normalizer.normalize(df)
1582
1582
  columns_renaming = normalizer.columns_renaming
1583
1583
 
@@ -2522,7 +2522,7 @@ class FeaturesEnricher(TransformerMixin):
2522
2522
  features_columns = [c for c in df.columns if c not in non_feature_columns]
2523
2523
 
2524
2524
  features_to_drop = FeaturesValidator(self.logger).validate(
2525
- df, features_columns, self.generate_features, self.warning_counter
2525
+ df, features_columns, self.generate_features, self.warning_counter, columns_renaming
2526
2526
  )
2527
2527
  self.fit_dropped_features.update(features_to_drop)
2528
2528
  df = df.drop(columns=features_to_drop)
@@ -22,7 +22,7 @@ slack_community_bage=https://img.shields.io/badge/slack-@upgini-orange.svg?logo=
22
22
  slack_community_alt=Upgini Slack community
23
23
  version_warning=\nWARNING: Unsupported library version detected {},\nplease update with “%pip install -U upgini” to the latest {} and restart Jupyter kernel
24
24
  unregistered_with_personal_keys=\nWARNING: Search key {} can be used only with personal api_key from profile.upgini.com It will be ignored
25
- date_only_search=\nWARNING: Search started with DATE search key only\nTry to add other keys like the COUNTRY, POSTAL_CODE, PHONE NUMBER, EMAIL/HEM, IPv4 to your training dataset\nfor search through all the available data sources.\nSee docs https://github.com/upgini/upgini#-total-239-countries-and-up-to-41-years-of-history
25
+ date_only_search=\nWARNING: Search started with DATE search key only\nTry to add other keys like the COUNTRY, POSTAL_CODE, PHONE NUMBER, EMAIL/HEM, IP to your training dataset\nfor search through all the available data sources.\nSee docs https://github.com/upgini/upgini#-total-239-countries-and-up-to-41-years-of-history
26
26
  date_search_without_time_series=\nWARNING: Looks like your training dataset is a time series. We recommend to set `cv=CVType.time_series` param for correct search results.\nSee docs https://github.com/upgini/upgini#-time-series-prediction-support
27
27
  metrics_exclude_paid_features=\nWARNING: Metrics calculated after enrichment has a free features only. To calculate metrics with a full set of relevant features, including commercial data sources, please contact support team:
28
28
  metrics_no_important_free_features=\nWARNING: No important free features to calculate metrics
@@ -190,7 +190,7 @@ ads_upload_too_few_rows=At least 1000 records per sample are needed. Increase th
190
190
  ads_upload_search_key_not_found=Search key {} wasn't found in dataframe columns
191
191
  ads_upload_to_many_empty_rows=More than 50% of rows in the submitted sample doesn't contain valid keys\nPlease fill the key columns with valid values and resubmit the data
192
192
  # Features info warning
193
- features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
193
+ features_info_zero_important_features=Oops, we can't find any relevant external features for your training dataset,\nmost probably due to issues with search keys formats.\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
194
194
  features_info_zero_hit_rate_search_keys=Oops, looks like values/formats of the search keys {} might be incorrect,\nas we won't be able to match any data source using these values\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
195
195
  features_not_generated=\nWARNING: Following features didn't pass checks for automated feature generation: {}
196
196
 
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from logging import Logger
3
- from typing import List, Optional
3
+ from typing import Dict, List, Optional
4
4
 
5
5
  import pandas as pd
6
6
  from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
@@ -23,6 +23,7 @@ class FeaturesValidator:
23
23
  features: List[str],
24
24
  features_for_generate: Optional[List[str]],
25
25
  warning_counter: WarningCounter,
26
+ columns_renaming: Optional[Dict[str, str]] = None,
26
27
  ) -> List[str]:
27
28
  # one_hot_encoded_features = []
28
29
  empty_or_constant_features = []
@@ -55,17 +56,25 @@ class FeaturesValidator:
55
56
  # self.logger.warning(msg)
56
57
  # warning_counter.increment()
57
58
 
59
+ columns_renaming = columns_renaming or {}
60
+
58
61
  if empty_or_constant_features:
59
- msg = bundle.get("empty_or_contant_features").format(empty_or_constant_features)
62
+ msg = bundle.get("empty_or_contant_features").format(
63
+ [columns_renaming.get(f, f) for f in empty_or_constant_features]
64
+ )
60
65
  print(msg)
61
66
  self.logger.warning(msg)
62
67
  warning_counter.increment()
63
68
 
64
69
  high_cardinality_features = self.find_high_cardinality(df[features])
65
70
  if features_for_generate:
66
- high_cardinality_features = [f for f in high_cardinality_features if f not in features_for_generate]
71
+ high_cardinality_features = [
72
+ f for f in high_cardinality_features if columns_renaming.get(f, f) not in features_for_generate
73
+ ]
67
74
  if high_cardinality_features:
68
- msg = bundle.get("high_cardinality_features").format(high_cardinality_features)
75
+ msg = bundle.get("high_cardinality_features").format(
76
+ [columns_renaming.get(f, f) for f in high_cardinality_features]
77
+ )
69
78
  print(msg)
70
79
  self.logger.warning(msg)
71
80
  warning_counter.increment()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.10
3
+ Version: 1.2.12
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=GI4rgymQsPWdk2_d96NgmZBRuFM6yOZB-kysnrjBjVo,23
1
+ upgini/__about__.py,sha256=dbW85A2PinQCZabwD2DNDTfOE9315GDtQQKAsJP8IXk,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=NIroiDLvlWtfxE9qqYYoB6ZTLgpGweRaCVcL8osXoI8,187995
6
+ upgini/features_enricher.py,sha256=eRkI2qpV-IprB1dQAMxzto6I6Q3b3SBuDMVR1_OFlyA,188008
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=bHRPWCNrUvt2R9qMX6dZFCJ0i8ENVCQ2Rw3dHH9IJEg,7447
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=WZAuYPX2Dpn6BHoA3RX8uvMNMr-yJE2fF7Gz0i24x2s,26459
33
+ upgini/resource_bundle/strings.properties,sha256=hWldMqtv80lwv8HV00Hk2-3tflu4BkD6tiXOfGDZPl8,26458
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
47
47
  upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
48
48
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
- upgini/utils/features_validator.py,sha256=LIF6YMpHlxCrVz6mvMpc1kfNTIMVGlNCor7IJTmlSfI,3307
50
+ upgini/utils/features_validator.py,sha256=yiOdzVtpArELMufzAa9mtWq32lETB6sIF-w3Yvl3vV8,3614
51
51
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
52
52
  upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
53
53
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.10.dist-info/METADATA,sha256=Cc-4FefWQaLK1hlCIR_dMIAm_NHRD9HxquQbTZn986E,48577
61
- upgini-1.2.10.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.10.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.10.dist-info/RECORD,,
60
+ upgini-1.2.12.dist-info/METADATA,sha256=k_J1xVbmpvm56wJ_hDo17cEK6rXRhhqJp3rSbw233xA,48577
61
+ upgini-1.2.12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.12.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.12.dist-info/RECORD,,