upgini 1.1.278a2__py3-none-any.whl → 1.1.279a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/dataset.py +2 -11
- upgini/features_enricher.py +100 -213
- upgini/fingerprint.js +8 -0
- upgini/metadata.py +2 -10
- upgini/metrics.py +1 -1
- upgini/resource_bundle/strings.properties +0 -1
- upgini/utils/base_search_key_detector.py +12 -14
- upgini/utils/datetime_utils.py +9 -10
- upgini/utils/deduplicate_utils.py +1 -11
- upgini/utils/email_utils.py +0 -5
- {upgini-1.1.278a2.dist-info → upgini-1.1.279a1.dist-info}/METADATA +1 -1
- {upgini-1.1.278a2.dist-info → upgini-1.1.279a1.dist-info}/RECORD +15 -14
- {upgini-1.1.278a2.dist-info → upgini-1.1.279a1.dist-info}/LICENSE +0 -0
- {upgini-1.1.278a2.dist-info → upgini-1.1.279a1.dist-info}/WHEEL +0 -0
- {upgini-1.1.278a2.dist-info → upgini-1.1.279a1.dist-info}/top_level.txt +0 -0
upgini/dataset.py
CHANGED
|
@@ -23,9 +23,7 @@ from pandas.api.types import (
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
24
24
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
25
25
|
from upgini.metadata import (
|
|
26
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
27
26
|
EVAL_SET_INDEX,
|
|
28
|
-
SEARCH_KEY_UNNEST,
|
|
29
27
|
SYSTEM_COLUMNS,
|
|
30
28
|
SYSTEM_RECORD_ID,
|
|
31
29
|
TARGET,
|
|
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
81
79
|
path: Optional[str] = None,
|
|
82
80
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
83
81
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
84
|
-
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
85
82
|
model_task_type: Optional[ModelTaskType] = None,
|
|
86
83
|
random_state: Optional[int] = None,
|
|
87
84
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
116
113
|
self.description = description
|
|
117
114
|
self.meaning_types = meaning_types
|
|
118
115
|
self.search_keys = search_keys
|
|
119
|
-
self.unnest_search_keys = unnest_search_keys
|
|
120
116
|
self.ignore_columns = []
|
|
121
117
|
self.hierarchical_group_keys = []
|
|
122
118
|
self.hierarchical_subgroup_keys = []
|
|
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
176
172
|
new_columns = []
|
|
177
173
|
dup_counter = 0
|
|
178
174
|
for column in self.data.columns:
|
|
179
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID
|
|
175
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
180
176
|
self.columns_renaming[column] = column
|
|
181
177
|
new_columns.append(column)
|
|
182
178
|
continue
|
|
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
357
353
|
|
|
358
354
|
if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
|
|
359
355
|
try:
|
|
360
|
-
self.data[postal_code] = (
|
|
361
|
-
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
362
|
-
)
|
|
356
|
+
self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
|
|
363
357
|
except Exception:
|
|
364
358
|
pass
|
|
365
359
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
809
803
|
meaningType=meaning_type,
|
|
810
804
|
minMaxValues=min_max_values,
|
|
811
805
|
)
|
|
812
|
-
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
813
|
-
column_meta.isUnnest = True
|
|
814
|
-
column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
|
|
815
806
|
|
|
816
807
|
columns.append(column_meta)
|
|
817
808
|
|
upgini/features_enricher.py
CHANGED
|
@@ -11,7 +11,6 @@ import sys
|
|
|
11
11
|
import tempfile
|
|
12
12
|
import time
|
|
13
13
|
import uuid
|
|
14
|
-
from collections import Counter
|
|
15
14
|
from dataclasses import dataclass
|
|
16
15
|
from threading import Thread
|
|
17
16
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
|
@@ -46,11 +45,9 @@ from upgini.mdc import MDC
|
|
|
46
45
|
from upgini.metadata import (
|
|
47
46
|
COUNTRY,
|
|
48
47
|
DEFAULT_INDEX,
|
|
49
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
50
48
|
EVAL_SET_INDEX,
|
|
51
49
|
ORIGINAL_INDEX,
|
|
52
50
|
RENAMED_INDEX,
|
|
53
|
-
SEARCH_KEY_UNNEST,
|
|
54
51
|
SORT_ID,
|
|
55
52
|
SYSTEM_RECORD_ID,
|
|
56
53
|
TARGET,
|
|
@@ -251,7 +248,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
251
248
|
self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
|
|
252
249
|
|
|
253
250
|
validate_version(self.logger)
|
|
254
|
-
self.search_keys = search_keys or
|
|
251
|
+
self.search_keys = search_keys or dict()
|
|
255
252
|
self.country_code = country_code
|
|
256
253
|
self.__validate_search_keys(search_keys, search_id)
|
|
257
254
|
self.model_task_type = model_task_type
|
|
@@ -1191,7 +1188,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1191
1188
|
email_column = self._get_email_column(search_keys)
|
|
1192
1189
|
hem_column = self._get_hem_column(search_keys)
|
|
1193
1190
|
if email_column:
|
|
1194
|
-
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys,
|
|
1191
|
+
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
1195
1192
|
extended_X = converter.convert(extended_X)
|
|
1196
1193
|
generated_features.extend(converter.generated_features)
|
|
1197
1194
|
if (
|
|
@@ -1343,7 +1340,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1343
1340
|
not in (
|
|
1344
1341
|
excluding_search_keys
|
|
1345
1342
|
+ list(self.fit_dropped_features)
|
|
1346
|
-
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID
|
|
1343
|
+
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID]
|
|
1347
1344
|
)
|
|
1348
1345
|
]
|
|
1349
1346
|
|
|
@@ -1407,7 +1404,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1407
1404
|
fitting_enriched_X[col].astype("string").str.replace(",", ".").astype(np.float64)
|
|
1408
1405
|
)
|
|
1409
1406
|
|
|
1410
|
-
fitting_eval_set_dict =
|
|
1407
|
+
fitting_eval_set_dict = dict()
|
|
1411
1408
|
for idx, eval_tuple in eval_set_sampled_dict.items():
|
|
1412
1409
|
eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
|
|
1413
1410
|
eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
|
|
@@ -1519,7 +1516,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1519
1516
|
def __sample_only_input(
|
|
1520
1517
|
self, validated_X: pd.DataFrame, validated_y: pd.Series, eval_set: Optional[List[tuple]], is_demo_dataset: bool
|
|
1521
1518
|
) -> _SampledDataForMetrics:
|
|
1522
|
-
eval_set_sampled_dict =
|
|
1519
|
+
eval_set_sampled_dict = dict()
|
|
1523
1520
|
|
|
1524
1521
|
df = validated_X.copy()
|
|
1525
1522
|
df[TARGET] = validated_y
|
|
@@ -1545,7 +1542,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1545
1542
|
df = df.sample(n=sample_rows, random_state=self.random_state)
|
|
1546
1543
|
|
|
1547
1544
|
df_extended, search_keys = self._extend_x(df, is_demo_dataset)
|
|
1548
|
-
df_extended = self.__add_fit_system_record_id(df_extended,
|
|
1545
|
+
df_extended = self.__add_fit_system_record_id(df_extended, dict(), search_keys)
|
|
1549
1546
|
|
|
1550
1547
|
train_df = df_extended.query(f"{EVAL_SET_INDEX} == 0") if eval_set is not None else df_extended
|
|
1551
1548
|
X_sampled = train_df.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
|
|
@@ -1569,7 +1566,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1569
1566
|
trace_id: str,
|
|
1570
1567
|
remove_outliers_calc_metrics: Optional[bool],
|
|
1571
1568
|
) -> _SampledDataForMetrics:
|
|
1572
|
-
eval_set_sampled_dict =
|
|
1569
|
+
eval_set_sampled_dict = dict()
|
|
1573
1570
|
search_keys = self.fit_search_keys
|
|
1574
1571
|
|
|
1575
1572
|
rows_to_drop = None
|
|
@@ -1643,7 +1640,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1643
1640
|
progress_bar: Optional[ProgressBar],
|
|
1644
1641
|
progress_callback: Optional[Callable[[SearchProgress], Any]],
|
|
1645
1642
|
) -> _SampledDataForMetrics:
|
|
1646
|
-
eval_set_sampled_dict =
|
|
1643
|
+
eval_set_sampled_dict = dict()
|
|
1647
1644
|
if eval_set is not None:
|
|
1648
1645
|
self.logger.info("Transform with eval_set")
|
|
1649
1646
|
# concatenate X and eval_set with eval_set_index
|
|
@@ -1665,7 +1662,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1665
1662
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS}")
|
|
1666
1663
|
df = df.sample(n=Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS, random_state=self.random_state)
|
|
1667
1664
|
|
|
1668
|
-
eval_set_sampled_dict =
|
|
1665
|
+
eval_set_sampled_dict = dict()
|
|
1669
1666
|
|
|
1670
1667
|
tmp_target_name = "__target"
|
|
1671
1668
|
df = df.rename(columns={TARGET: tmp_target_name})
|
|
@@ -1928,38 +1925,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1928
1925
|
self.logger.info("Input dataset hasn't date column")
|
|
1929
1926
|
if self.add_date_if_missing:
|
|
1930
1927
|
df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
|
|
1931
|
-
|
|
1932
|
-
# Don't pass all features in backend on transform
|
|
1933
|
-
original_features_for_transform = []
|
|
1934
|
-
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1935
|
-
features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
|
|
1936
|
-
if len(features_not_to_pass) > 0:
|
|
1937
|
-
# Pass only features that need for transform
|
|
1938
|
-
features_for_transform = self._search_task.get_features_for_transform()
|
|
1939
|
-
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1940
|
-
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1941
|
-
original_features_for_transform = [
|
|
1942
|
-
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1943
|
-
]
|
|
1944
|
-
|
|
1945
|
-
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
|
|
1946
|
-
|
|
1947
|
-
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1948
|
-
|
|
1949
|
-
df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
|
|
1950
|
-
df[columns_for_system_record_id], index=False
|
|
1951
|
-
).astype("Float64")
|
|
1952
|
-
|
|
1953
|
-
# Explode multiple search keys
|
|
1954
|
-
df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
|
|
1955
|
-
|
|
1956
1928
|
email_column = self._get_email_column(search_keys)
|
|
1957
1929
|
hem_column = self._get_hem_column(search_keys)
|
|
1958
1930
|
email_converted_to_hem = False
|
|
1959
1931
|
if email_column:
|
|
1960
|
-
converter = EmailSearchKeyConverter(
|
|
1961
|
-
email_column, hem_column, search_keys, list(unnest_search_keys.keys()), self.logger
|
|
1962
|
-
)
|
|
1932
|
+
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
1963
1933
|
df = converter.convert(df)
|
|
1964
1934
|
generated_features.extend(converter.generated_features)
|
|
1965
1935
|
email_converted_to_hem = converter.email_converted_to_hem
|
|
@@ -1973,21 +1943,30 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1973
1943
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1974
1944
|
|
|
1975
1945
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
1976
|
-
|
|
1977
|
-
for col in original_features_for_transform:
|
|
1978
|
-
meaning_types[col] = FileColumnMeaningType.FEATURE
|
|
1979
|
-
features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
|
|
1946
|
+
non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1980
1947
|
|
|
1981
1948
|
if email_converted_to_hem:
|
|
1982
|
-
|
|
1949
|
+
non_keys_columns.append(email_column)
|
|
1950
|
+
|
|
1951
|
+
# Don't pass features in backend on transform
|
|
1952
|
+
original_features_for_transform = None
|
|
1953
|
+
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1954
|
+
if len(non_keys_columns) > 0:
|
|
1955
|
+
# Pass only features that need for transform
|
|
1956
|
+
features_for_transform = self._search_task.get_features_for_transform()
|
|
1957
|
+
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1958
|
+
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1959
|
+
original_features_for_transform = [
|
|
1960
|
+
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1961
|
+
]
|
|
1962
|
+
non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
|
|
1983
1963
|
|
|
1984
|
-
|
|
1985
|
-
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1964
|
+
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
|
|
1986
1965
|
|
|
1987
1966
|
if add_fit_system_record_id:
|
|
1988
|
-
df = self.__add_fit_system_record_id(df,
|
|
1967
|
+
df = self.__add_fit_system_record_id(df, dict(), search_keys)
|
|
1989
1968
|
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
|
1990
|
-
|
|
1969
|
+
non_keys_columns.append(SORT_ID)
|
|
1991
1970
|
|
|
1992
1971
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
|
|
1993
1972
|
|
|
@@ -1995,19 +1974,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1995
1974
|
"Float64"
|
|
1996
1975
|
)
|
|
1997
1976
|
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
1998
|
-
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
1999
|
-
if SEARCH_KEY_UNNEST in df.columns:
|
|
2000
|
-
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
2001
1977
|
|
|
2002
1978
|
df = df.reset_index(drop=True)
|
|
2003
|
-
system_columns_with_original_index = [SYSTEM_RECORD_ID
|
|
1979
|
+
system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
|
|
2004
1980
|
if add_fit_system_record_id:
|
|
2005
1981
|
system_columns_with_original_index.append(SORT_ID)
|
|
2006
1982
|
df_with_original_index = df[system_columns_with_original_index].copy()
|
|
2007
1983
|
|
|
2008
1984
|
combined_search_keys = combine_search_keys(search_keys.keys())
|
|
2009
1985
|
|
|
2010
|
-
df_without_features = df.drop(columns=
|
|
1986
|
+
df_without_features = df.drop(columns=non_keys_columns)
|
|
2011
1987
|
|
|
2012
1988
|
df_without_features = clean_full_duplicates(
|
|
2013
1989
|
df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
|
|
@@ -2019,13 +1995,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2019
1995
|
dataset = Dataset(
|
|
2020
1996
|
"sample_" + str(uuid.uuid4()),
|
|
2021
1997
|
df=df_without_features,
|
|
2022
|
-
meaning_types=meaning_types,
|
|
2023
|
-
search_keys=combined_search_keys,
|
|
2024
|
-
unnest_search_keys=unnest_search_keys,
|
|
2025
1998
|
date_format=self.date_format,
|
|
2026
1999
|
rest_client=self.rest_client,
|
|
2027
2000
|
logger=self.logger,
|
|
2028
2001
|
)
|
|
2002
|
+
dataset.meaning_types = meaning_types
|
|
2003
|
+
dataset.search_keys = combined_search_keys
|
|
2029
2004
|
if email_converted_to_hem:
|
|
2030
2005
|
dataset.ignore_columns = [email_column]
|
|
2031
2006
|
|
|
@@ -2164,14 +2139,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2164
2139
|
|
|
2165
2140
|
key_types = search_keys.values()
|
|
2166
2141
|
|
|
2167
|
-
# Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
|
|
2168
|
-
multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
|
|
2169
|
-
for multi_key in multi_keys:
|
|
2170
|
-
if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
|
|
2171
|
-
msg = self.bundle.get("unsupported_multi_key").format(multi_key)
|
|
2172
|
-
self.logger.warning(msg)
|
|
2173
|
-
raise ValidationError(msg)
|
|
2174
|
-
|
|
2175
2142
|
if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
|
|
2176
2143
|
msg = self.bundle.get("date_and_datetime_simultanious")
|
|
2177
2144
|
self.logger.warning(msg)
|
|
@@ -2187,11 +2154,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2187
2154
|
self.logger.warning(msg)
|
|
2188
2155
|
raise ValidationError(msg)
|
|
2189
2156
|
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2157
|
+
for key_type in SearchKey.__members__.values():
|
|
2158
|
+
if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
|
|
2159
|
+
msg = self.bundle.get("multiple_search_key").format(key_type)
|
|
2160
|
+
self.logger.warning(msg)
|
|
2161
|
+
raise ValidationError(msg)
|
|
2195
2162
|
|
|
2196
2163
|
# non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
|
|
2197
2164
|
# if (
|
|
@@ -2329,6 +2296,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2329
2296
|
self.logger.info("Input dataset hasn't date column")
|
|
2330
2297
|
if self.add_date_if_missing:
|
|
2331
2298
|
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2299
|
+
email_column = self._get_email_column(self.fit_search_keys)
|
|
2300
|
+
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2301
|
+
email_converted_to_hem = False
|
|
2302
|
+
if email_column:
|
|
2303
|
+
converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
|
|
2304
|
+
df = converter.convert(df)
|
|
2305
|
+
self.fit_generated_features.extend(converter.generated_features)
|
|
2306
|
+
email_converted_to_hem = converter.email_converted_to_hem
|
|
2332
2307
|
if (
|
|
2333
2308
|
self.detect_missing_search_keys
|
|
2334
2309
|
and list(self.fit_search_keys.values()) == [SearchKey.DATE]
|
|
@@ -2337,37 +2312,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2337
2312
|
converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
|
|
2338
2313
|
df = converter.convert(df)
|
|
2339
2314
|
|
|
2340
|
-
# Explode multiple search keys
|
|
2341
2315
|
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
|
|
2342
|
-
meaning_types = {
|
|
2343
|
-
**{col: key.value for col, key in self.fit_search_keys.items()},
|
|
2344
|
-
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2345
|
-
}
|
|
2346
|
-
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2347
|
-
if eval_set is not None and len(eval_set) > 0:
|
|
2348
|
-
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2349
|
-
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
|
|
2350
|
-
|
|
2351
|
-
# TODO check that this is correct for enrichment
|
|
2352
|
-
self.df_with_original_index = df.copy()
|
|
2353
|
-
|
|
2354
|
-
df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
|
|
2355
|
-
|
|
2356
|
-
# Convert EMAIL to HEM after unnesting to do it only with one column
|
|
2357
|
-
email_column = self._get_email_column(self.fit_search_keys)
|
|
2358
|
-
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2359
|
-
email_converted_to_hem = False
|
|
2360
|
-
if email_column:
|
|
2361
|
-
converter = EmailSearchKeyConverter(
|
|
2362
|
-
email_column, hem_column, self.fit_search_keys, list(unnest_search_keys.keys()), self.logger
|
|
2363
|
-
)
|
|
2364
|
-
df = converter.convert(df)
|
|
2365
|
-
self.fit_generated_features.extend(converter.generated_features)
|
|
2366
|
-
email_converted_to_hem = converter.email_converted_to_hem
|
|
2367
|
-
|
|
2368
|
-
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
|
|
2369
|
-
self.fit_search_keys.keys()
|
|
2370
|
-
)
|
|
2371
2316
|
if email_converted_to_hem:
|
|
2372
2317
|
non_feature_columns.append(email_column)
|
|
2373
2318
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
@@ -2391,14 +2336,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2391
2336
|
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2392
2337
|
}
|
|
2393
2338
|
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2394
|
-
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
2395
|
-
if SEARCH_KEY_UNNEST in df.columns:
|
|
2396
|
-
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
2397
2339
|
if eval_set is not None and len(eval_set) > 0:
|
|
2398
2340
|
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2399
2341
|
|
|
2400
|
-
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys
|
|
2342
|
+
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
|
|
2401
2343
|
|
|
2344
|
+
self.df_with_original_index = df.copy()
|
|
2402
2345
|
df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
|
|
2403
2346
|
|
|
2404
2347
|
combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
|
|
@@ -2406,15 +2349,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2406
2349
|
dataset = Dataset(
|
|
2407
2350
|
"tds_" + str(uuid.uuid4()),
|
|
2408
2351
|
df=df,
|
|
2409
|
-
meaning_types=meaning_types,
|
|
2410
|
-
search_keys=combined_search_keys,
|
|
2411
|
-
unnest_search_keys=unnest_search_keys,
|
|
2412
2352
|
model_task_type=model_task_type,
|
|
2413
2353
|
date_format=self.date_format,
|
|
2414
2354
|
random_state=self.random_state,
|
|
2415
2355
|
rest_client=self.rest_client,
|
|
2416
2356
|
logger=self.logger,
|
|
2417
2357
|
)
|
|
2358
|
+
dataset.meaning_types = meaning_types
|
|
2359
|
+
dataset.search_keys = combined_search_keys
|
|
2418
2360
|
if email_converted_to_hem:
|
|
2419
2361
|
dataset.ignore_columns = [email_column]
|
|
2420
2362
|
|
|
@@ -2784,10 +2726,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2784
2726
|
X: pd.DataFrame, y: pd.Series, cv: Optional[CVType]
|
|
2785
2727
|
) -> Tuple[pd.DataFrame, pd.Series]:
|
|
2786
2728
|
if cv not in [CVType.time_series, CVType.blocked_time_series]:
|
|
2787
|
-
record_id_column = ENTITY_SYSTEM_RECORD_ID if ENTITY_SYSTEM_RECORD_ID in X else SYSTEM_RECORD_ID
|
|
2788
2729
|
Xy = X.copy()
|
|
2789
2730
|
Xy[TARGET] = y
|
|
2790
|
-
Xy = Xy.sort_values(by=
|
|
2731
|
+
Xy = Xy.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
|
|
2791
2732
|
X = Xy.drop(columns=TARGET)
|
|
2792
2733
|
y = Xy[TARGET].copy()
|
|
2793
2734
|
|
|
@@ -2964,19 +2905,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2964
2905
|
|
|
2965
2906
|
@staticmethod
|
|
2966
2907
|
def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
if len(cols) == 1:
|
|
2971
|
-
return cols[0]
|
|
2908
|
+
for col, t in search_keys.items():
|
|
2909
|
+
if t == SearchKey.EMAIL:
|
|
2910
|
+
return col
|
|
2972
2911
|
|
|
2973
2912
|
@staticmethod
|
|
2974
2913
|
def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
if len(cols) == 1:
|
|
2979
|
-
return cols[0]
|
|
2914
|
+
for col, t in search_keys.items():
|
|
2915
|
+
if t == SearchKey.HEM:
|
|
2916
|
+
return col
|
|
2980
2917
|
|
|
2981
2918
|
@staticmethod
|
|
2982
2919
|
def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
@@ -2984,44 +2921,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2984
2921
|
if t == SearchKey.PHONE:
|
|
2985
2922
|
return col
|
|
2986
2923
|
|
|
2987
|
-
def _explode_multiple_search_keys(
|
|
2988
|
-
self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]
|
|
2989
|
-
) -> Tuple[pd.DataFrame, Dict[str, List[str]]]:
|
|
2990
|
-
# find groups of multiple search keys
|
|
2991
|
-
search_key_names_by_type: Dict[SearchKey, str] = {}
|
|
2992
|
-
for key_name, key_type in search_keys.items():
|
|
2993
|
-
search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
|
|
2994
|
-
search_key_names_by_type = {
|
|
2995
|
-
key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
|
|
2996
|
-
}
|
|
2997
|
-
if len(search_key_names_by_type) == 0:
|
|
2998
|
-
return df, {}
|
|
2999
|
-
|
|
3000
|
-
multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
|
|
3001
|
-
other_columns = [col for col in df.columns if col not in multiple_keys_columns]
|
|
3002
|
-
exploded_dfs = []
|
|
3003
|
-
unnest_search_keys = {}
|
|
3004
|
-
|
|
3005
|
-
for key_type, key_names in search_key_names_by_type.items():
|
|
3006
|
-
new_search_key = f"upgini_{key_type.name.lower()}_unnest"
|
|
3007
|
-
exploded_df = pd.melt(
|
|
3008
|
-
df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
|
|
3009
|
-
)
|
|
3010
|
-
exploded_dfs.append(exploded_df)
|
|
3011
|
-
for old_key in key_names:
|
|
3012
|
-
del search_keys[old_key]
|
|
3013
|
-
search_keys[new_search_key] = key_type
|
|
3014
|
-
unnest_search_keys[new_search_key] = key_names
|
|
3015
|
-
|
|
3016
|
-
df = pd.concat(exploded_dfs, ignore_index=True)
|
|
3017
|
-
return df, unnest_search_keys
|
|
3018
|
-
|
|
3019
2924
|
def __add_fit_system_record_id(
|
|
3020
|
-
self,
|
|
3021
|
-
df: pd.DataFrame,
|
|
3022
|
-
meaning_types: Dict[str, FileColumnMeaningType],
|
|
3023
|
-
search_keys: Dict[str, SearchKey],
|
|
3024
|
-
id_name: str,
|
|
2925
|
+
self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
|
|
3025
2926
|
) -> pd.DataFrame:
|
|
3026
2927
|
# save original order or rows
|
|
3027
2928
|
original_index_name = df.index.name
|
|
@@ -3070,18 +2971,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3070
2971
|
|
|
3071
2972
|
df = df.reset_index(drop=True).reset_index()
|
|
3072
2973
|
# system_record_id saves correct order for fit
|
|
3073
|
-
df = df.rename(columns={DEFAULT_INDEX:
|
|
2974
|
+
df = df.rename(columns={DEFAULT_INDEX: SYSTEM_RECORD_ID})
|
|
3074
2975
|
|
|
3075
2976
|
# return original order
|
|
3076
2977
|
df = df.set_index(ORIGINAL_INDEX)
|
|
3077
2978
|
df.index.name = original_index_name
|
|
3078
2979
|
df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
|
|
3079
2980
|
|
|
3080
|
-
meaning_types[
|
|
3081
|
-
FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
3082
|
-
if id_name == SYSTEM_RECORD_ID
|
|
3083
|
-
else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
3084
|
-
)
|
|
2981
|
+
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
3085
2982
|
return df
|
|
3086
2983
|
|
|
3087
2984
|
def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -3136,11 +3033,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3136
3033
|
)
|
|
3137
3034
|
|
|
3138
3035
|
comparing_columns = X.columns if is_transform else df_with_original_index.columns
|
|
3139
|
-
dup_features = [
|
|
3140
|
-
c
|
|
3141
|
-
for c in comparing_columns
|
|
3142
|
-
if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
3143
|
-
]
|
|
3036
|
+
dup_features = [c for c in comparing_columns if c in result_features.columns and c != SYSTEM_RECORD_ID]
|
|
3144
3037
|
if len(dup_features) > 0:
|
|
3145
3038
|
self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
|
|
3146
3039
|
raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
|
|
@@ -3151,7 +3044,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3151
3044
|
result_features = pd.merge(
|
|
3152
3045
|
df_with_original_index,
|
|
3153
3046
|
result_features,
|
|
3154
|
-
|
|
3047
|
+
left_on=SYSTEM_RECORD_ID,
|
|
3048
|
+
right_on=SYSTEM_RECORD_ID,
|
|
3155
3049
|
how="left" if is_transform else "inner",
|
|
3156
3050
|
)
|
|
3157
3051
|
result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
|
|
@@ -3162,7 +3056,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3162
3056
|
result_features = result_features[~result_features[SYSTEM_RECORD_ID].isin(rows_to_drop[SYSTEM_RECORD_ID])]
|
|
3163
3057
|
self.logger.info(f"After dropping target outliers size: {len(result_features)}")
|
|
3164
3058
|
|
|
3165
|
-
result_eval_sets =
|
|
3059
|
+
result_eval_sets = dict()
|
|
3166
3060
|
if not is_transform and EVAL_SET_INDEX in result_features.columns:
|
|
3167
3061
|
result_train_features = result_features.loc[result_features[EVAL_SET_INDEX] == 0].copy()
|
|
3168
3062
|
eval_set_indices = list(result_features[EVAL_SET_INDEX].unique())
|
|
@@ -3368,7 +3262,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3368
3262
|
if autofe_feature.op.is_vector:
|
|
3369
3263
|
continue
|
|
3370
3264
|
|
|
3371
|
-
description =
|
|
3265
|
+
description = dict()
|
|
3372
3266
|
|
|
3373
3267
|
feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True))
|
|
3374
3268
|
if feature_meta is None:
|
|
@@ -3534,13 +3428,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3534
3428
|
self.warning_counter.increment()
|
|
3535
3429
|
|
|
3536
3430
|
if len(valid_search_keys) == 1:
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
|
|
3543
|
-
|
|
3431
|
+
for k, v in valid_search_keys.items():
|
|
3432
|
+
# Show warning for country only if country is the only key
|
|
3433
|
+
if x[k].nunique() == 1 and (v != SearchKey.COUNTRY or len(valid_search_keys) == 1):
|
|
3434
|
+
msg = self.bundle.get("single_constant_search_key").format(v, x[k].values[0])
|
|
3435
|
+
print(msg)
|
|
3436
|
+
self.logger.warning(msg)
|
|
3437
|
+
self.warning_counter.increment()
|
|
3544
3438
|
|
|
3545
3439
|
self.logger.info(f"Prepared search keys: {valid_search_keys}")
|
|
3546
3440
|
|
|
@@ -3650,68 +3544,61 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3650
3544
|
def check_need_detect(search_key: SearchKey):
|
|
3651
3545
|
return not is_transform or search_key in self.fit_search_keys.values()
|
|
3652
3546
|
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3656
|
-
|
|
3657
|
-
|
|
3658
|
-
|
|
3659
|
-
self.autodetected_search_keys.update(new_keys)
|
|
3660
|
-
self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
|
|
3547
|
+
if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
|
3548
|
+
maybe_key = PostalCodeSearchKeyDetector().get_search_key_column(sample)
|
|
3549
|
+
if maybe_key is not None:
|
|
3550
|
+
search_keys[maybe_key] = SearchKey.POSTAL_CODE
|
|
3551
|
+
self.autodetected_search_keys[maybe_key] = SearchKey.POSTAL_CODE
|
|
3552
|
+
self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_key}")
|
|
3661
3553
|
if not silent_mode:
|
|
3662
|
-
print(self.bundle.get("postal_code_detected").format(
|
|
3554
|
+
print(self.bundle.get("postal_code_detected").format(maybe_key))
|
|
3663
3555
|
|
|
3664
3556
|
if (
|
|
3665
3557
|
SearchKey.COUNTRY not in search_keys.values()
|
|
3666
3558
|
and self.country_code is None
|
|
3667
3559
|
and check_need_detect(SearchKey.COUNTRY)
|
|
3668
3560
|
):
|
|
3669
|
-
maybe_key = CountrySearchKeyDetector().
|
|
3670
|
-
if maybe_key:
|
|
3671
|
-
search_keys[maybe_key
|
|
3672
|
-
self.autodetected_search_keys[maybe_key
|
|
3561
|
+
maybe_key = CountrySearchKeyDetector().get_search_key_column(sample)
|
|
3562
|
+
if maybe_key is not None:
|
|
3563
|
+
search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3564
|
+
self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3673
3565
|
self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
|
|
3674
3566
|
if not silent_mode:
|
|
3675
3567
|
print(self.bundle.get("country_detected").format(maybe_key))
|
|
3676
3568
|
|
|
3677
3569
|
if (
|
|
3678
|
-
|
|
3679
|
-
SearchKey.HEM not in search_keys.values()
|
|
3570
|
+
SearchKey.EMAIL not in search_keys.values()
|
|
3571
|
+
and SearchKey.HEM not in search_keys.values()
|
|
3680
3572
|
and check_need_detect(SearchKey.HEM)
|
|
3681
3573
|
):
|
|
3682
|
-
|
|
3683
|
-
if
|
|
3574
|
+
maybe_key = EmailSearchKeyDetector().get_search_key_column(sample)
|
|
3575
|
+
if maybe_key is not None and maybe_key not in search_keys.keys():
|
|
3684
3576
|
if self.__is_registered or is_demo_dataset:
|
|
3685
|
-
|
|
3686
|
-
|
|
3687
|
-
self.
|
|
3688
|
-
self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
|
|
3577
|
+
search_keys[maybe_key] = SearchKey.EMAIL
|
|
3578
|
+
self.autodetected_search_keys[maybe_key] = SearchKey.EMAIL
|
|
3579
|
+
self.logger.info(f"Autodetected search key EMAIL in column {maybe_key}")
|
|
3689
3580
|
if not silent_mode:
|
|
3690
|
-
print(self.bundle.get("email_detected").format(
|
|
3581
|
+
print(self.bundle.get("email_detected").format(maybe_key))
|
|
3691
3582
|
else:
|
|
3692
3583
|
self.logger.warning(
|
|
3693
|
-
f"Autodetected search key EMAIL in column {
|
|
3694
|
-
" But not used because not registered user"
|
|
3584
|
+
f"Autodetected search key EMAIL in column {maybe_key}. But not used because not registered user"
|
|
3695
3585
|
)
|
|
3696
3586
|
if not silent_mode:
|
|
3697
|
-
print(self.bundle.get("email_detected_not_registered").format(
|
|
3587
|
+
print(self.bundle.get("email_detected_not_registered").format(maybe_key))
|
|
3698
3588
|
self.warning_counter.increment()
|
|
3699
3589
|
|
|
3700
|
-
|
|
3701
|
-
|
|
3702
|
-
|
|
3703
|
-
if maybe_keys:
|
|
3590
|
+
if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
|
|
3591
|
+
maybe_key = PhoneSearchKeyDetector().get_search_key_column(sample)
|
|
3592
|
+
if maybe_key is not None and maybe_key not in search_keys.keys():
|
|
3704
3593
|
if self.__is_registered or is_demo_dataset:
|
|
3705
|
-
|
|
3706
|
-
|
|
3707
|
-
self.
|
|
3708
|
-
self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
|
|
3594
|
+
search_keys[maybe_key] = SearchKey.PHONE
|
|
3595
|
+
self.autodetected_search_keys[maybe_key] = SearchKey.PHONE
|
|
3596
|
+
self.logger.info(f"Autodetected search key PHONE in column {maybe_key}")
|
|
3709
3597
|
if not silent_mode:
|
|
3710
|
-
print(self.bundle.get("phone_detected").format(
|
|
3598
|
+
print(self.bundle.get("phone_detected").format(maybe_key))
|
|
3711
3599
|
else:
|
|
3712
3600
|
self.logger.warning(
|
|
3713
|
-
f"Autodetected search key PHONE in column {
|
|
3714
|
-
"But not used because not registered user"
|
|
3601
|
+
f"Autodetected search key PHONE in column {maybe_key}. But not used because not registered user"
|
|
3715
3602
|
)
|
|
3716
3603
|
if not silent_mode:
|
|
3717
3604
|
print(self.bundle.get("phone_detected_not_registered"))
|
upgini/fingerprint.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
|
|
3
|
+
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
|
4
|
+
*
|
|
5
|
+
* This software contains code from open-source projects:
|
|
6
|
+
* MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
|
|
7
|
+
*/
|
|
8
|
+
var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
|
upgini/metadata.py
CHANGED
|
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Set
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
SYSTEM_RECORD_ID = "system_record_id"
|
|
7
|
-
ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
|
|
8
|
-
SEARCH_KEY_UNNEST = "search_key_unnest"
|
|
9
7
|
SORT_ID = "sort_id"
|
|
10
8
|
EVAL_SET_INDEX = "eval_set_index"
|
|
11
9
|
TARGET = "target"
|
|
@@ -13,7 +11,7 @@ COUNTRY = "country_iso_code"
|
|
|
13
11
|
RENAMED_INDEX = "index_col"
|
|
14
12
|
DEFAULT_INDEX = "index"
|
|
15
13
|
ORIGINAL_INDEX = "original_index"
|
|
16
|
-
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID,
|
|
14
|
+
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
class FileColumnMeaningType(Enum):
|
|
@@ -39,8 +37,6 @@ class FileColumnMeaningType(Enum):
|
|
|
39
37
|
POSTAL_CODE = "POSTAL_CODE"
|
|
40
38
|
SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
|
|
41
39
|
EVAL_SET_INDEX = "EVAL_SET_INDEX"
|
|
42
|
-
ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
|
|
43
|
-
UNNEST_KEY = "UNNEST_KEY"
|
|
44
40
|
|
|
45
41
|
|
|
46
42
|
class SearchKey(Enum):
|
|
@@ -186,10 +182,6 @@ class FileColumnMetadata(BaseModel):
|
|
|
186
182
|
meaningType: FileColumnMeaningType
|
|
187
183
|
minMaxValues: Optional[NumericInterval] = None
|
|
188
184
|
originalName: Optional[str]
|
|
189
|
-
# is this column contains keys from multiple key columns like msisdn1, msisdn2
|
|
190
|
-
isUnnest: bool = False
|
|
191
|
-
# list of original etalon key column names like msisdn1, msisdn2
|
|
192
|
-
unnestKeyNames: Optional[list[str]]
|
|
193
185
|
|
|
194
186
|
|
|
195
187
|
class FileMetadata(BaseModel):
|
|
@@ -284,7 +276,7 @@ class FeaturesFilter(BaseModel):
|
|
|
284
276
|
|
|
285
277
|
|
|
286
278
|
class RuntimeParameters(BaseModel):
|
|
287
|
-
properties: Dict[str, str] =
|
|
279
|
+
properties: Dict[str, str] = dict()
|
|
288
280
|
|
|
289
281
|
|
|
290
282
|
class SearchCustomization(BaseModel):
|
upgini/metrics.py
CHANGED
|
@@ -357,7 +357,7 @@ class EstimatorWrapper:
|
|
|
357
357
|
"logger": logger,
|
|
358
358
|
}
|
|
359
359
|
if estimator is None:
|
|
360
|
-
params =
|
|
360
|
+
params = dict()
|
|
361
361
|
# if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
|
|
362
362
|
# params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
|
|
363
363
|
if target_type == ModelTaskType.MULTICLASS:
|
|
@@ -88,7 +88,6 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
|
88
88
|
search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
|
|
89
89
|
empty_search_key=Search key {} is empty. Please fill values or remove this search key
|
|
90
90
|
single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
|
|
91
|
-
unsupported_multi_key=Search key {} cannot be used multiple times
|
|
92
91
|
unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
|
|
93
92
|
date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
|
|
94
93
|
invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List, Optional
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
@@ -10,18 +10,16 @@ class BaseSearchKeyDetector:
|
|
|
10
10
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
11
|
raise NotImplementedError()
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
column_name
|
|
16
|
-
|
|
17
|
-
if self._is_search_key_by_name(column_name)
|
|
18
|
-
]
|
|
13
|
+
def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
|
|
14
|
+
for column_name in column_names:
|
|
15
|
+
if self._is_search_key_by_name(column_name):
|
|
16
|
+
return column_name
|
|
19
17
|
|
|
20
|
-
def
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
18
|
+
def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
|
|
19
|
+
maybe_column = self._get_search_key_by_name(df.columns.to_list())
|
|
20
|
+
if maybe_column is not None:
|
|
21
|
+
return maybe_column
|
|
22
|
+
|
|
23
|
+
for column_name in df.columns:
|
|
25
24
|
if self._is_search_key_by_values(df[column_name]):
|
|
26
|
-
|
|
27
|
-
return list(set(columns_by_names + columns_by_values))
|
|
25
|
+
return column_name
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -208,14 +208,13 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
|
|
|
208
208
|
if nunique_dates / days_delta < 0.3:
|
|
209
209
|
return False
|
|
210
210
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def is_multiple_rows(group):
|
|
211
|
+
accumulated_changing_columns = set()
|
|
212
|
+
|
|
213
|
+
def check_differences(group: pd.DataFrame):
|
|
214
|
+
changing_columns = group.columns[group.nunique(dropna=False) > 1].to_list()
|
|
215
|
+
accumulated_changing_columns.update(changing_columns)
|
|
216
|
+
|
|
217
|
+
def is_multiple_rows(group: pd.DataFrame) -> bool:
|
|
219
218
|
return group.shape[0] > 1
|
|
220
219
|
|
|
221
220
|
grouped = df.groupby(date_col)
|
|
@@ -228,8 +227,8 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
|
|
|
228
227
|
if df.shape[1] <= 3:
|
|
229
228
|
return True
|
|
230
229
|
|
|
231
|
-
|
|
232
|
-
return
|
|
230
|
+
grouped.apply(check_differences, include_groups=False)
|
|
231
|
+
return len(accumulated_changing_columns) <= 2
|
|
233
232
|
|
|
234
233
|
|
|
235
234
|
def validate_dates_distribution(
|
|
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
7
|
-
ENTITY_SYSTEM_RECORD_ID,
|
|
8
|
-
EVAL_SET_INDEX,
|
|
9
|
-
SORT_ID,
|
|
10
|
-
SYSTEM_RECORD_ID,
|
|
11
|
-
TARGET,
|
|
12
|
-
ModelTaskType,
|
|
13
|
-
SearchKey,
|
|
14
|
-
)
|
|
6
|
+
from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
15
7
|
from upgini.resource_bundle import ResourceBundle
|
|
16
8
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
17
9
|
from upgini.utils.target_utils import define_task
|
|
@@ -151,8 +143,6 @@ def clean_full_duplicates(
|
|
|
151
143
|
unique_columns = df.columns.tolist()
|
|
152
144
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
153
145
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
154
|
-
if ENTITY_SYSTEM_RECORD_ID in unique_columns:
|
|
155
|
-
unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
|
|
156
146
|
if SORT_ID in unique_columns:
|
|
157
147
|
unique_columns.remove(SORT_ID)
|
|
158
148
|
if EVAL_SET_INDEX in unique_columns:
|
upgini/utils/email_utils.py
CHANGED
|
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
|
|
|
38
38
|
email_column: str,
|
|
39
39
|
hem_column: Optional[str],
|
|
40
40
|
search_keys: Dict[str, SearchKey],
|
|
41
|
-
unnest_search_keys: Optional[List[str]] = None,
|
|
42
41
|
logger: Optional[logging.Logger] = None,
|
|
43
42
|
):
|
|
44
43
|
self.email_column = email_column
|
|
45
44
|
self.hem_column = hem_column
|
|
46
45
|
self.search_keys = search_keys
|
|
47
|
-
self.unnest_search_keys = unnest_search_keys
|
|
48
46
|
if logger is not None:
|
|
49
47
|
self.logger = logger
|
|
50
48
|
else:
|
|
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
|
|
|
82
80
|
del self.search_keys[self.email_column]
|
|
83
81
|
return df
|
|
84
82
|
self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
|
|
85
|
-
self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
|
|
86
83
|
self.email_converted_to_hem = True
|
|
87
84
|
|
|
88
85
|
del self.search_keys[self.email_column]
|
|
89
|
-
if self.email_column in self.unnest_search_keys:
|
|
90
|
-
self.unnest_search_keys.remove(self.email_column)
|
|
91
86
|
|
|
92
87
|
df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
|
|
93
88
|
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=ys7RQoZsyY8-NkUZyp12K8z5aQmg7pyx0LtwclFtXkc,176358
|
|
6
|
+
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
6
7
|
upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
|
|
7
|
-
upgini/metadata.py,sha256=
|
|
8
|
-
upgini/metrics.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
|
|
9
|
+
upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
|
|
9
10
|
upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
|
|
10
11
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
11
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
@@ -28,22 +29,22 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
28
29
|
upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
|
|
29
30
|
upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
|
|
30
31
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
31
|
-
upgini/resource_bundle/strings.properties,sha256
|
|
32
|
+
upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
|
|
32
33
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
33
34
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
35
|
upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
|
|
35
36
|
upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
|
|
36
37
|
upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
37
38
|
upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
|
|
38
|
-
upgini/utils/base_search_key_detector.py,sha256=
|
|
39
|
+
upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
|
|
39
40
|
upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
|
|
40
41
|
upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
|
|
41
42
|
upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
|
|
42
43
|
upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
|
|
43
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
44
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
44
|
+
upgini/utils/datetime_utils.py,sha256=La3jQSkc1cdFAm6KcSAOWKg6-n7rFzTlDjMONxm45YM,10411
|
|
45
|
+
upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
|
|
45
46
|
upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
|
|
46
|
-
upgini/utils/email_utils.py,sha256=
|
|
47
|
+
upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
|
|
47
48
|
upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
|
|
48
49
|
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
49
50
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
@@ -55,8 +56,8 @@ upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,4
|
|
|
55
56
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
56
57
|
upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
|
|
57
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
58
|
-
upgini-1.1.
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.279a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
60
|
+
upgini-1.1.279a1.dist-info/METADATA,sha256=tcdQ86ByFS4oZKHAS_DPGVUATTQo0JKDYB6Lw7E_oR4,48158
|
|
61
|
+
upgini-1.1.279a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
62
|
+
upgini-1.1.279a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
63
|
+
upgini-1.1.279a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|