upgini 1.2.141__py3-none-any.whl → 1.2.142__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/dataset.py +8 -0
- upgini/features_enricher.py +502 -559
- upgini/metadata.py +2 -1
- upgini/normalizer/normalize_utils.py +1 -1
- upgini/resource_bundle/strings.properties +10 -9
- upgini/utils/datetime_utils.py +7 -4
- {upgini-1.2.141.dist-info → upgini-1.2.142.dist-info}/METADATA +1 -1
- {upgini-1.2.141.dist-info → upgini-1.2.142.dist-info}/RECORD +11 -11
- {upgini-1.2.141.dist-info → upgini-1.2.142.dist-info}/WHEEL +0 -0
- {upgini-1.2.141.dist-info → upgini-1.2.142.dist-info}/licenses/LICENSE +0 -0
upgini/metadata.py
CHANGED
|
@@ -96,7 +96,7 @@ class SearchKey(Enum):
|
|
|
96
96
|
return [SearchKey.EMAIL, SearchKey.HEM, SearchKey.IP, SearchKey.PHONE]
|
|
97
97
|
|
|
98
98
|
@staticmethod
|
|
99
|
-
def from_meaning_type(meaning_type: FileColumnMeaningType) -> "SearchKey":
|
|
99
|
+
def from_meaning_type(meaning_type: FileColumnMeaningType) -> Optional["SearchKey"]:
|
|
100
100
|
if meaning_type == FileColumnMeaningType.EMAIL:
|
|
101
101
|
return SearchKey.EMAIL
|
|
102
102
|
if meaning_type == FileColumnMeaningType.HEM:
|
|
@@ -263,6 +263,7 @@ class FileMetadata(BaseModel):
|
|
|
263
263
|
digest: Optional[str] = None
|
|
264
264
|
deterministicDigest: Optional[str] = None
|
|
265
265
|
droppedColumns: Optional[List[str]] = None
|
|
266
|
+
autodetectedSearchKeys: Optional[Dict[str, str]] = None
|
|
266
267
|
|
|
267
268
|
def column_by_name(self, name: str) -> Optional[FileColumnMetadata]:
|
|
268
269
|
for c in self.columns:
|
|
@@ -134,7 +134,7 @@ class Normalizer:
|
|
|
134
134
|
|
|
135
135
|
for f in features:
|
|
136
136
|
converter = DateTimeConverter(f)
|
|
137
|
-
if converter.is_datetime(df):
|
|
137
|
+
if converter.is_datetime(df) and f != DateTimeConverter.DATETIME_COL:
|
|
138
138
|
self.removed_datetime_features.append(f)
|
|
139
139
|
df.drop(columns=f, inplace=True)
|
|
140
140
|
|
|
@@ -140,6 +140,7 @@ x_and_eval_x_diff_types=X and eval_set X has different types: {} and {}
|
|
|
140
140
|
eval_x_has_train_samples=Eval set X has rows that are present in train set X
|
|
141
141
|
oot_without_date_not_supported=Eval set {} provided as OOT but date column is missing. It will be ignored for stability check
|
|
142
142
|
oot_with_online_sources_not_supported=Eval set {} provided as OOT and also provided columns for online API. It will be ignored for stability check
|
|
143
|
+
autodetected_search_key_not_found=Autodetected on fit search key {} not found in X columns: {} for transform
|
|
143
144
|
|
|
144
145
|
baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input dataframe
|
|
145
146
|
baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
|
|
@@ -210,15 +211,15 @@ features_info_zero_important_features=Oops, we can't find any relevant external
|
|
|
210
211
|
features_info_zero_hit_rate_search_keys=Oops, looks like values/formats of the search keys {} might be incorrect,\nas we won't be able to match any data source using these values\nPlease check docs https://github.com/upgini/upgini#-search-key-types-we-support-more-to-come or send us a help request in Support:
|
|
211
212
|
features_not_generated=Following features didn't pass checks for automated feature generation: {}
|
|
212
213
|
# Information
|
|
213
|
-
datetime_detected=Datetime detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
214
|
-
postal_code_detected=Postal codes detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
215
|
-
country_detected=Countries detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
216
|
-
country_auto_determined=Search key country_code `{}` was automatically determined by client IP. \nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
217
|
-
country_default_determined=Search key country_code `{}` was used as default. \nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
218
|
-
email_detected=Emails detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
219
|
-
email_detected_not_registered=Emails detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
220
|
-
phone_detected=Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
221
|
-
phone_detected_not_registered=Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
|
|
214
|
+
datetime_detected=Datetime detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
215
|
+
postal_code_detected=Postal codes detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
216
|
+
country_detected=Countries detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
217
|
+
country_auto_determined=Search key country_code `{}` was automatically determined by client IP. \nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
218
|
+
country_default_determined=Search key country_code `{}` was used as default. \nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
219
|
+
email_detected=Emails detected in column `{}`. It will be used as a search key\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
220
|
+
email_detected_not_registered=Emails detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
221
|
+
phone_detected=Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
222
|
+
phone_detected_not_registered=Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns\n
|
|
222
223
|
target_type_detected=\nDetected task type: {}. Reason: {}\nYou can set task type manually with argument `model_task_type` of FeaturesEnricher constructor if task type detected incorrectly\n
|
|
223
224
|
binary_target_reason=only two unique label-values observed
|
|
224
225
|
non_numeric_multiclass_reason=non-numeric label values observed
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -282,10 +282,13 @@ class DateTimeConverter:
|
|
|
282
282
|
warnings.filterwarnings("ignore", message="Could not infer format")
|
|
283
283
|
return pd.to_datetime(df[self.date_column])
|
|
284
284
|
except ValueError:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
285
|
+
try:
|
|
286
|
+
return pd.to_datetime(df[self.date_column], format="mixed", errors="raise")
|
|
287
|
+
except ValueError:
|
|
288
|
+
if raise_errors:
|
|
289
|
+
raise ValidationError(self.bundle.get("invalid_date_format").format(self.date_column))
|
|
290
|
+
else:
|
|
291
|
+
return None
|
|
289
292
|
|
|
290
293
|
def clean_old_dates(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
291
294
|
condition = df[self.date_column] <= self.MIN_SUPPORTED_DATE_TS
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=J-UkV9cG8CRQHHY_zWq7GDwjkfngS30TGOqeyQgyN0Q,24
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=bkUpUC1sdhCQcLPysB7BC8WiFDPfjiJj1SztExpv0nA,33735
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=07decijgeSDaN6A1qVf71ZETu9nXVhNRWUTPWZeNpJo,234667
|
|
7
7
|
upgini/http.py,sha256=y26x4TQVYuEM3jz8JdASxSyBtvBemUkFf-FmX25sx-s,44356
|
|
8
|
-
upgini/metadata.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=iYlL91g2PMHjiPIySIZb4IzIIUsPaAMIiV2It95GAjA,12866
|
|
9
9
|
upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
|
|
10
10
|
upgini/search_task.py,sha256=5mL_qV5mVtDkIumM9xCOgfa9Lc2B8mxJ1qI21iaScnQ,18656
|
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
@@ -35,10 +35,10 @@ upgini/data_source/data_source_publisher.py,sha256=CQi3fEukaStV-RiadSEvEFLThOlZJ
|
|
|
35
35
|
upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
|
|
36
36
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
|
37
37
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
upgini/normalizer/normalize_utils.py,sha256=
|
|
38
|
+
upgini/normalizer/normalize_utils.py,sha256=mnOFU39oOsv_UhJVArl8BSocZlNP0xzgeAbSYI14Zjs,8492
|
|
39
39
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
40
40
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
41
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
41
|
+
upgini/resource_bundle/strings.properties,sha256=GvOX9GZJaWxlL9X3FqtF3LyvPGVKyJniCzb5LVNiXwo,29710
|
|
42
42
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
43
43
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
upgini/sampler/base.py,sha256=Fva2FEhLiNRPZ9Q6uOtJRtRzwsayjv7aphalAZO_4lc,6452
|
|
@@ -52,7 +52,7 @@ upgini/utils/config.py,sha256=zFdnjchykfp_1Tm3Qep7phLzXBpXIOzr2tIuXchRBLw,1754
|
|
|
52
52
|
upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
|
|
53
53
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
54
54
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
55
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
55
|
+
upgini/utils/datetime_utils.py,sha256=rr_aUjzKxj9I-0EPyRnWCquXkb4hdh6YcEDtoxeB2XE,17783
|
|
56
56
|
upgini/utils/deduplicate_utils.py,sha256=CLX0QapRxB-ZVQT7yGvv1vSd2zac5SwRjCJavujdCps,11332
|
|
57
57
|
upgini/utils/display_utils.py,sha256=MoTqXZJvC6pAqgOaI3V0FG-IU_LnMfrn4TDcNvUqsdg,13316
|
|
58
58
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=CihpV6SC95HwtlMH60rGAUzVDa4Id0Bva8ySprmNHlE,
|
|
|
74
74
|
upgini/utils/track_info.py,sha256=NDKeQTUlZaYp15UoP-xLKGoDoJQ0drbDMwB0g9R0PUg,6427
|
|
75
75
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
76
76
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
77
|
-
upgini-1.2.
|
|
78
|
-
upgini-1.2.
|
|
79
|
-
upgini-1.2.
|
|
80
|
-
upgini-1.2.
|
|
77
|
+
upgini-1.2.142.dist-info/METADATA,sha256=iWbuZsbyF4hxG9iUnlF9tYH9JKx8FOBT34J5Bhs7cPo,51164
|
|
78
|
+
upgini-1.2.142.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
79
|
+
upgini-1.2.142.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
80
|
+
upgini-1.2.142.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|