upgini 1.1.277__py3-none-any.whl → 1.1.278a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/dataset.py +11 -2
- upgini/features_enricher.py +213 -100
- upgini/metadata.py +10 -2
- upgini/metrics.py +1 -1
- upgini/resource_bundle/strings.properties +1 -0
- upgini/utils/base_search_key_detector.py +14 -12
- upgini/utils/datetime_utils.py +2 -2
- upgini/utils/deduplicate_utils.py +11 -1
- upgini/utils/email_utils.py +5 -0
- {upgini-1.1.277.dist-info → upgini-1.1.278a2.dist-info}/METADATA +1 -1
- {upgini-1.1.277.dist-info → upgini-1.1.278a2.dist-info}/RECORD +14 -15
- upgini/fingerprint.js +0 -8
- {upgini-1.1.277.dist-info → upgini-1.1.278a2.dist-info}/LICENSE +0 -0
- {upgini-1.1.277.dist-info → upgini-1.1.278a2.dist-info}/WHEEL +0 -0
- {upgini-1.1.277.dist-info → upgini-1.1.278a2.dist-info}/top_level.txt +0 -0
upgini/dataset.py
CHANGED
|
@@ -23,7 +23,9 @@ from pandas.api.types import (
|
|
|
23
23
|
from upgini.errors import ValidationError
|
|
24
24
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
25
25
|
from upgini.metadata import (
|
|
26
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
26
27
|
EVAL_SET_INDEX,
|
|
28
|
+
SEARCH_KEY_UNNEST,
|
|
27
29
|
SYSTEM_COLUMNS,
|
|
28
30
|
SYSTEM_RECORD_ID,
|
|
29
31
|
TARGET,
|
|
@@ -79,6 +81,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
79
81
|
path: Optional[str] = None,
|
|
80
82
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
81
83
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
84
|
+
unnest_search_keys: Optional[Dict[str, str]] = None,
|
|
82
85
|
model_task_type: Optional[ModelTaskType] = None,
|
|
83
86
|
random_state: Optional[int] = None,
|
|
84
87
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -113,6 +116,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
113
116
|
self.description = description
|
|
114
117
|
self.meaning_types = meaning_types
|
|
115
118
|
self.search_keys = search_keys
|
|
119
|
+
self.unnest_search_keys = unnest_search_keys
|
|
116
120
|
self.ignore_columns = []
|
|
117
121
|
self.hierarchical_group_keys = []
|
|
118
122
|
self.hierarchical_subgroup_keys = []
|
|
@@ -172,7 +176,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
172
176
|
new_columns = []
|
|
173
177
|
dup_counter = 0
|
|
174
178
|
for column in self.data.columns:
|
|
175
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
179
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
|
|
176
180
|
self.columns_renaming[column] = column
|
|
177
181
|
new_columns.append(column)
|
|
178
182
|
continue
|
|
@@ -353,7 +357,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
353
357
|
|
|
354
358
|
if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
|
|
355
359
|
try:
|
|
356
|
-
self.data[postal_code] =
|
|
360
|
+
self.data[postal_code] = (
|
|
361
|
+
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
362
|
+
)
|
|
357
363
|
except Exception:
|
|
358
364
|
pass
|
|
359
365
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -803,6 +809,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
803
809
|
meaningType=meaning_type,
|
|
804
810
|
minMaxValues=min_max_values,
|
|
805
811
|
)
|
|
812
|
+
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
813
|
+
column_meta.isUnnest = True
|
|
814
|
+
column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
|
|
806
815
|
|
|
807
816
|
columns.append(column_meta)
|
|
808
817
|
|
upgini/features_enricher.py
CHANGED
|
@@ -11,6 +11,7 @@ import sys
|
|
|
11
11
|
import tempfile
|
|
12
12
|
import time
|
|
13
13
|
import uuid
|
|
14
|
+
from collections import Counter
|
|
14
15
|
from dataclasses import dataclass
|
|
15
16
|
from threading import Thread
|
|
16
17
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
|
@@ -45,9 +46,11 @@ from upgini.mdc import MDC
|
|
|
45
46
|
from upgini.metadata import (
|
|
46
47
|
COUNTRY,
|
|
47
48
|
DEFAULT_INDEX,
|
|
49
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
48
50
|
EVAL_SET_INDEX,
|
|
49
51
|
ORIGINAL_INDEX,
|
|
50
52
|
RENAMED_INDEX,
|
|
53
|
+
SEARCH_KEY_UNNEST,
|
|
51
54
|
SORT_ID,
|
|
52
55
|
SYSTEM_RECORD_ID,
|
|
53
56
|
TARGET,
|
|
@@ -248,7 +251,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
248
251
|
self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
|
|
249
252
|
|
|
250
253
|
validate_version(self.logger)
|
|
251
|
-
self.search_keys = search_keys or
|
|
254
|
+
self.search_keys = search_keys or {}
|
|
252
255
|
self.country_code = country_code
|
|
253
256
|
self.__validate_search_keys(search_keys, search_id)
|
|
254
257
|
self.model_task_type = model_task_type
|
|
@@ -1188,7 +1191,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1188
1191
|
email_column = self._get_email_column(search_keys)
|
|
1189
1192
|
hem_column = self._get_hem_column(search_keys)
|
|
1190
1193
|
if email_column:
|
|
1191
|
-
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
1194
|
+
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
|
|
1192
1195
|
extended_X = converter.convert(extended_X)
|
|
1193
1196
|
generated_features.extend(converter.generated_features)
|
|
1194
1197
|
if (
|
|
@@ -1340,7 +1343,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1340
1343
|
not in (
|
|
1341
1344
|
excluding_search_keys
|
|
1342
1345
|
+ list(self.fit_dropped_features)
|
|
1343
|
-
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID]
|
|
1346
|
+
+ [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
1344
1347
|
)
|
|
1345
1348
|
]
|
|
1346
1349
|
|
|
@@ -1404,7 +1407,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1404
1407
|
fitting_enriched_X[col].astype("string").str.replace(",", ".").astype(np.float64)
|
|
1405
1408
|
)
|
|
1406
1409
|
|
|
1407
|
-
fitting_eval_set_dict =
|
|
1410
|
+
fitting_eval_set_dict = {}
|
|
1408
1411
|
for idx, eval_tuple in eval_set_sampled_dict.items():
|
|
1409
1412
|
eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
|
|
1410
1413
|
eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
|
|
@@ -1516,7 +1519,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1516
1519
|
def __sample_only_input(
|
|
1517
1520
|
self, validated_X: pd.DataFrame, validated_y: pd.Series, eval_set: Optional[List[tuple]], is_demo_dataset: bool
|
|
1518
1521
|
) -> _SampledDataForMetrics:
|
|
1519
|
-
eval_set_sampled_dict =
|
|
1522
|
+
eval_set_sampled_dict = {}
|
|
1520
1523
|
|
|
1521
1524
|
df = validated_X.copy()
|
|
1522
1525
|
df[TARGET] = validated_y
|
|
@@ -1542,7 +1545,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1542
1545
|
df = df.sample(n=sample_rows, random_state=self.random_state)
|
|
1543
1546
|
|
|
1544
1547
|
df_extended, search_keys = self._extend_x(df, is_demo_dataset)
|
|
1545
|
-
df_extended = self.__add_fit_system_record_id(df_extended,
|
|
1548
|
+
df_extended = self.__add_fit_system_record_id(df_extended, {}, search_keys)
|
|
1546
1549
|
|
|
1547
1550
|
train_df = df_extended.query(f"{EVAL_SET_INDEX} == 0") if eval_set is not None else df_extended
|
|
1548
1551
|
X_sampled = train_df.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
|
|
@@ -1566,7 +1569,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1566
1569
|
trace_id: str,
|
|
1567
1570
|
remove_outliers_calc_metrics: Optional[bool],
|
|
1568
1571
|
) -> _SampledDataForMetrics:
|
|
1569
|
-
eval_set_sampled_dict =
|
|
1572
|
+
eval_set_sampled_dict = {}
|
|
1570
1573
|
search_keys = self.fit_search_keys
|
|
1571
1574
|
|
|
1572
1575
|
rows_to_drop = None
|
|
@@ -1640,7 +1643,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1640
1643
|
progress_bar: Optional[ProgressBar],
|
|
1641
1644
|
progress_callback: Optional[Callable[[SearchProgress], Any]],
|
|
1642
1645
|
) -> _SampledDataForMetrics:
|
|
1643
|
-
eval_set_sampled_dict =
|
|
1646
|
+
eval_set_sampled_dict = {}
|
|
1644
1647
|
if eval_set is not None:
|
|
1645
1648
|
self.logger.info("Transform with eval_set")
|
|
1646
1649
|
# concatenate X and eval_set with eval_set_index
|
|
@@ -1662,7 +1665,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1662
1665
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS}")
|
|
1663
1666
|
df = df.sample(n=Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS, random_state=self.random_state)
|
|
1664
1667
|
|
|
1665
|
-
eval_set_sampled_dict =
|
|
1668
|
+
eval_set_sampled_dict = {}
|
|
1666
1669
|
|
|
1667
1670
|
tmp_target_name = "__target"
|
|
1668
1671
|
df = df.rename(columns={TARGET: tmp_target_name})
|
|
@@ -1925,11 +1928,38 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1925
1928
|
self.logger.info("Input dataset hasn't date column")
|
|
1926
1929
|
if self.add_date_if_missing:
|
|
1927
1930
|
df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
|
|
1931
|
+
|
|
1932
|
+
# Don't pass all features in backend on transform
|
|
1933
|
+
original_features_for_transform = []
|
|
1934
|
+
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1935
|
+
features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
|
|
1936
|
+
if len(features_not_to_pass) > 0:
|
|
1937
|
+
# Pass only features that need for transform
|
|
1938
|
+
features_for_transform = self._search_task.get_features_for_transform()
|
|
1939
|
+
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1940
|
+
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1941
|
+
original_features_for_transform = [
|
|
1942
|
+
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1943
|
+
]
|
|
1944
|
+
|
|
1945
|
+
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
|
|
1946
|
+
|
|
1947
|
+
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1948
|
+
|
|
1949
|
+
df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
|
|
1950
|
+
df[columns_for_system_record_id], index=False
|
|
1951
|
+
).astype("Float64")
|
|
1952
|
+
|
|
1953
|
+
# Explode multiple search keys
|
|
1954
|
+
df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
|
|
1955
|
+
|
|
1928
1956
|
email_column = self._get_email_column(search_keys)
|
|
1929
1957
|
hem_column = self._get_hem_column(search_keys)
|
|
1930
1958
|
email_converted_to_hem = False
|
|
1931
1959
|
if email_column:
|
|
1932
|
-
converter = EmailSearchKeyConverter(
|
|
1960
|
+
converter = EmailSearchKeyConverter(
|
|
1961
|
+
email_column, hem_column, search_keys, list(unnest_search_keys.keys()), self.logger
|
|
1962
|
+
)
|
|
1933
1963
|
df = converter.convert(df)
|
|
1934
1964
|
generated_features.extend(converter.generated_features)
|
|
1935
1965
|
email_converted_to_hem = converter.email_converted_to_hem
|
|
@@ -1943,30 +1973,21 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1943
1973
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1944
1974
|
|
|
1945
1975
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
1946
|
-
non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1976
|
+
# non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1977
|
+
for col in original_features_for_transform:
|
|
1978
|
+
meaning_types[col] = FileColumnMeaningType.FEATURE
|
|
1979
|
+
features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
|
|
1947
1980
|
|
|
1948
1981
|
if email_converted_to_hem:
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
# Don't pass features in backend on transform
|
|
1952
|
-
original_features_for_transform = None
|
|
1953
|
-
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1954
|
-
if len(non_keys_columns) > 0:
|
|
1955
|
-
# Pass only features that need for transform
|
|
1956
|
-
features_for_transform = self._search_task.get_features_for_transform()
|
|
1957
|
-
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1958
|
-
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1959
|
-
original_features_for_transform = [
|
|
1960
|
-
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1961
|
-
]
|
|
1962
|
-
non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
|
|
1982
|
+
features_not_to_pass.append(email_column)
|
|
1963
1983
|
|
|
1964
|
-
|
|
1984
|
+
features_not_to_pass = [c for c in features_not_to_pass if c not in original_features_for_transform]
|
|
1985
|
+
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1965
1986
|
|
|
1966
1987
|
if add_fit_system_record_id:
|
|
1967
|
-
df = self.__add_fit_system_record_id(df,
|
|
1988
|
+
df = self.__add_fit_system_record_id(df, {}, search_keys)
|
|
1968
1989
|
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
|
1969
|
-
|
|
1990
|
+
features_not_to_pass.append(SORT_ID)
|
|
1970
1991
|
|
|
1971
1992
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
|
|
1972
1993
|
|
|
@@ -1974,16 +1995,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1974
1995
|
"Float64"
|
|
1975
1996
|
)
|
|
1976
1997
|
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
1998
|
+
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
1999
|
+
if SEARCH_KEY_UNNEST in df.columns:
|
|
2000
|
+
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
1977
2001
|
|
|
1978
2002
|
df = df.reset_index(drop=True)
|
|
1979
|
-
system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
|
|
2003
|
+
system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
|
|
1980
2004
|
if add_fit_system_record_id:
|
|
1981
2005
|
system_columns_with_original_index.append(SORT_ID)
|
|
1982
2006
|
df_with_original_index = df[system_columns_with_original_index].copy()
|
|
1983
2007
|
|
|
1984
2008
|
combined_search_keys = combine_search_keys(search_keys.keys())
|
|
1985
2009
|
|
|
1986
|
-
df_without_features = df.drop(columns=
|
|
2010
|
+
df_without_features = df.drop(columns=features_not_to_pass)
|
|
1987
2011
|
|
|
1988
2012
|
df_without_features = clean_full_duplicates(
|
|
1989
2013
|
df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
|
|
@@ -1995,12 +2019,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1995
2019
|
dataset = Dataset(
|
|
1996
2020
|
"sample_" + str(uuid.uuid4()),
|
|
1997
2021
|
df=df_without_features,
|
|
2022
|
+
meaning_types=meaning_types,
|
|
2023
|
+
search_keys=combined_search_keys,
|
|
2024
|
+
unnest_search_keys=unnest_search_keys,
|
|
1998
2025
|
date_format=self.date_format,
|
|
1999
2026
|
rest_client=self.rest_client,
|
|
2000
2027
|
logger=self.logger,
|
|
2001
2028
|
)
|
|
2002
|
-
dataset.meaning_types = meaning_types
|
|
2003
|
-
dataset.search_keys = combined_search_keys
|
|
2004
2029
|
if email_converted_to_hem:
|
|
2005
2030
|
dataset.ignore_columns = [email_column]
|
|
2006
2031
|
|
|
@@ -2139,6 +2164,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2139
2164
|
|
|
2140
2165
|
key_types = search_keys.values()
|
|
2141
2166
|
|
|
2167
|
+
# Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
|
|
2168
|
+
multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
|
|
2169
|
+
for multi_key in multi_keys:
|
|
2170
|
+
if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
|
|
2171
|
+
msg = self.bundle.get("unsupported_multi_key").format(multi_key)
|
|
2172
|
+
self.logger.warning(msg)
|
|
2173
|
+
raise ValidationError(msg)
|
|
2174
|
+
|
|
2142
2175
|
if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
|
|
2143
2176
|
msg = self.bundle.get("date_and_datetime_simultanious")
|
|
2144
2177
|
self.logger.warning(msg)
|
|
@@ -2154,11 +2187,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2154
2187
|
self.logger.warning(msg)
|
|
2155
2188
|
raise ValidationError(msg)
|
|
2156
2189
|
|
|
2157
|
-
for key_type in SearchKey.__members__.values():
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2190
|
+
# for key_type in SearchKey.__members__.values():
|
|
2191
|
+
# if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
|
|
2192
|
+
# msg = self.bundle.get("multiple_search_key").format(key_type)
|
|
2193
|
+
# self.logger.warning(msg)
|
|
2194
|
+
# raise ValidationError(msg)
|
|
2162
2195
|
|
|
2163
2196
|
# non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
|
|
2164
2197
|
# if (
|
|
@@ -2296,14 +2329,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2296
2329
|
self.logger.info("Input dataset hasn't date column")
|
|
2297
2330
|
if self.add_date_if_missing:
|
|
2298
2331
|
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2299
|
-
email_column = self._get_email_column(self.fit_search_keys)
|
|
2300
|
-
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2301
|
-
email_converted_to_hem = False
|
|
2302
|
-
if email_column:
|
|
2303
|
-
converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
|
|
2304
|
-
df = converter.convert(df)
|
|
2305
|
-
self.fit_generated_features.extend(converter.generated_features)
|
|
2306
|
-
email_converted_to_hem = converter.email_converted_to_hem
|
|
2307
2332
|
if (
|
|
2308
2333
|
self.detect_missing_search_keys
|
|
2309
2334
|
and list(self.fit_search_keys.values()) == [SearchKey.DATE]
|
|
@@ -2312,7 +2337,37 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2312
2337
|
converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
|
|
2313
2338
|
df = converter.convert(df)
|
|
2314
2339
|
|
|
2340
|
+
# Explode multiple search keys
|
|
2315
2341
|
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
|
|
2342
|
+
meaning_types = {
|
|
2343
|
+
**{col: key.value for col, key in self.fit_search_keys.items()},
|
|
2344
|
+
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2345
|
+
}
|
|
2346
|
+
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2347
|
+
if eval_set is not None and len(eval_set) > 0:
|
|
2348
|
+
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2349
|
+
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
|
|
2350
|
+
|
|
2351
|
+
# TODO check that this is correct for enrichment
|
|
2352
|
+
self.df_with_original_index = df.copy()
|
|
2353
|
+
|
|
2354
|
+
df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
|
|
2355
|
+
|
|
2356
|
+
# Convert EMAIL to HEM after unnesting to do it only with one column
|
|
2357
|
+
email_column = self._get_email_column(self.fit_search_keys)
|
|
2358
|
+
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2359
|
+
email_converted_to_hem = False
|
|
2360
|
+
if email_column:
|
|
2361
|
+
converter = EmailSearchKeyConverter(
|
|
2362
|
+
email_column, hem_column, self.fit_search_keys, list(unnest_search_keys.keys()), self.logger
|
|
2363
|
+
)
|
|
2364
|
+
df = converter.convert(df)
|
|
2365
|
+
self.fit_generated_features.extend(converter.generated_features)
|
|
2366
|
+
email_converted_to_hem = converter.email_converted_to_hem
|
|
2367
|
+
|
|
2368
|
+
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
|
|
2369
|
+
self.fit_search_keys.keys()
|
|
2370
|
+
)
|
|
2316
2371
|
if email_converted_to_hem:
|
|
2317
2372
|
non_feature_columns.append(email_column)
|
|
2318
2373
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
@@ -2336,12 +2391,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2336
2391
|
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2337
2392
|
}
|
|
2338
2393
|
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2394
|
+
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
2395
|
+
if SEARCH_KEY_UNNEST in df.columns:
|
|
2396
|
+
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
2339
2397
|
if eval_set is not None and len(eval_set) > 0:
|
|
2340
2398
|
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2341
2399
|
|
|
2342
|
-
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
|
|
2400
|
+
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, SYSTEM_RECORD_ID)
|
|
2343
2401
|
|
|
2344
|
-
self.df_with_original_index = df.copy()
|
|
2345
2402
|
df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
|
|
2346
2403
|
|
|
2347
2404
|
combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
|
|
@@ -2349,14 +2406,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2349
2406
|
dataset = Dataset(
|
|
2350
2407
|
"tds_" + str(uuid.uuid4()),
|
|
2351
2408
|
df=df,
|
|
2409
|
+
meaning_types=meaning_types,
|
|
2410
|
+
search_keys=combined_search_keys,
|
|
2411
|
+
unnest_search_keys=unnest_search_keys,
|
|
2352
2412
|
model_task_type=model_task_type,
|
|
2353
2413
|
date_format=self.date_format,
|
|
2354
2414
|
random_state=self.random_state,
|
|
2355
2415
|
rest_client=self.rest_client,
|
|
2356
2416
|
logger=self.logger,
|
|
2357
2417
|
)
|
|
2358
|
-
dataset.meaning_types = meaning_types
|
|
2359
|
-
dataset.search_keys = combined_search_keys
|
|
2360
2418
|
if email_converted_to_hem:
|
|
2361
2419
|
dataset.ignore_columns = [email_column]
|
|
2362
2420
|
|
|
@@ -2726,9 +2784,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2726
2784
|
X: pd.DataFrame, y: pd.Series, cv: Optional[CVType]
|
|
2727
2785
|
) -> Tuple[pd.DataFrame, pd.Series]:
|
|
2728
2786
|
if cv not in [CVType.time_series, CVType.blocked_time_series]:
|
|
2787
|
+
record_id_column = ENTITY_SYSTEM_RECORD_ID if ENTITY_SYSTEM_RECORD_ID in X else SYSTEM_RECORD_ID
|
|
2729
2788
|
Xy = X.copy()
|
|
2730
2789
|
Xy[TARGET] = y
|
|
2731
|
-
Xy = Xy.sort_values(by=
|
|
2790
|
+
Xy = Xy.sort_values(by=record_id_column).reset_index(drop=True)
|
|
2732
2791
|
X = Xy.drop(columns=TARGET)
|
|
2733
2792
|
y = Xy[TARGET].copy()
|
|
2734
2793
|
|
|
@@ -2905,15 +2964,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2905
2964
|
|
|
2906
2965
|
@staticmethod
|
|
2907
2966
|
def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2908
|
-
for col, t in search_keys.items()
|
|
2909
|
-
|
|
2910
|
-
|
|
2967
|
+
cols = [col for col, t in search_keys.items() if t == SearchKey.EMAIL]
|
|
2968
|
+
if len(cols) > 1:
|
|
2969
|
+
raise Exception("More than one email column found after unnest")
|
|
2970
|
+
if len(cols) == 1:
|
|
2971
|
+
return cols[0]
|
|
2911
2972
|
|
|
2912
2973
|
@staticmethod
|
|
2913
2974
|
def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2914
|
-
for col, t in search_keys.items()
|
|
2915
|
-
|
|
2916
|
-
|
|
2975
|
+
cols = [col for col, t in search_keys.items() if t == SearchKey.HEM]
|
|
2976
|
+
if len(cols) > 1:
|
|
2977
|
+
raise Exception("More than one hem column found after unnest")
|
|
2978
|
+
if len(cols) == 1:
|
|
2979
|
+
return cols[0]
|
|
2917
2980
|
|
|
2918
2981
|
@staticmethod
|
|
2919
2982
|
def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
@@ -2921,8 +2984,44 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2921
2984
|
if t == SearchKey.PHONE:
|
|
2922
2985
|
return col
|
|
2923
2986
|
|
|
2987
|
+
def _explode_multiple_search_keys(
|
|
2988
|
+
self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]
|
|
2989
|
+
) -> Tuple[pd.DataFrame, Dict[str, List[str]]]:
|
|
2990
|
+
# find groups of multiple search keys
|
|
2991
|
+
search_key_names_by_type: Dict[SearchKey, str] = {}
|
|
2992
|
+
for key_name, key_type in search_keys.items():
|
|
2993
|
+
search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
|
|
2994
|
+
search_key_names_by_type = {
|
|
2995
|
+
key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
|
|
2996
|
+
}
|
|
2997
|
+
if len(search_key_names_by_type) == 0:
|
|
2998
|
+
return df, {}
|
|
2999
|
+
|
|
3000
|
+
multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
|
|
3001
|
+
other_columns = [col for col in df.columns if col not in multiple_keys_columns]
|
|
3002
|
+
exploded_dfs = []
|
|
3003
|
+
unnest_search_keys = {}
|
|
3004
|
+
|
|
3005
|
+
for key_type, key_names in search_key_names_by_type.items():
|
|
3006
|
+
new_search_key = f"upgini_{key_type.name.lower()}_unnest"
|
|
3007
|
+
exploded_df = pd.melt(
|
|
3008
|
+
df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
|
|
3009
|
+
)
|
|
3010
|
+
exploded_dfs.append(exploded_df)
|
|
3011
|
+
for old_key in key_names:
|
|
3012
|
+
del search_keys[old_key]
|
|
3013
|
+
search_keys[new_search_key] = key_type
|
|
3014
|
+
unnest_search_keys[new_search_key] = key_names
|
|
3015
|
+
|
|
3016
|
+
df = pd.concat(exploded_dfs, ignore_index=True)
|
|
3017
|
+
return df, unnest_search_keys
|
|
3018
|
+
|
|
2924
3019
|
def __add_fit_system_record_id(
|
|
2925
|
-
self,
|
|
3020
|
+
self,
|
|
3021
|
+
df: pd.DataFrame,
|
|
3022
|
+
meaning_types: Dict[str, FileColumnMeaningType],
|
|
3023
|
+
search_keys: Dict[str, SearchKey],
|
|
3024
|
+
id_name: str,
|
|
2926
3025
|
) -> pd.DataFrame:
|
|
2927
3026
|
# save original order or rows
|
|
2928
3027
|
original_index_name = df.index.name
|
|
@@ -2971,14 +3070,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2971
3070
|
|
|
2972
3071
|
df = df.reset_index(drop=True).reset_index()
|
|
2973
3072
|
# system_record_id saves correct order for fit
|
|
2974
|
-
df = df.rename(columns={DEFAULT_INDEX:
|
|
3073
|
+
df = df.rename(columns={DEFAULT_INDEX: id_name})
|
|
2975
3074
|
|
|
2976
3075
|
# return original order
|
|
2977
3076
|
df = df.set_index(ORIGINAL_INDEX)
|
|
2978
3077
|
df.index.name = original_index_name
|
|
2979
3078
|
df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
|
|
2980
3079
|
|
|
2981
|
-
meaning_types[
|
|
3080
|
+
meaning_types[id_name] = (
|
|
3081
|
+
FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
3082
|
+
if id_name == SYSTEM_RECORD_ID
|
|
3083
|
+
else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
3084
|
+
)
|
|
2982
3085
|
return df
|
|
2983
3086
|
|
|
2984
3087
|
def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -3033,7 +3136,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3033
3136
|
)
|
|
3034
3137
|
|
|
3035
3138
|
comparing_columns = X.columns if is_transform else df_with_original_index.columns
|
|
3036
|
-
dup_features = [
|
|
3139
|
+
dup_features = [
|
|
3140
|
+
c
|
|
3141
|
+
for c in comparing_columns
|
|
3142
|
+
if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
3143
|
+
]
|
|
3037
3144
|
if len(dup_features) > 0:
|
|
3038
3145
|
self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
|
|
3039
3146
|
raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
|
|
@@ -3044,8 +3151,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3044
3151
|
result_features = pd.merge(
|
|
3045
3152
|
df_with_original_index,
|
|
3046
3153
|
result_features,
|
|
3047
|
-
|
|
3048
|
-
right_on=SYSTEM_RECORD_ID,
|
|
3154
|
+
on=ENTITY_SYSTEM_RECORD_ID,
|
|
3049
3155
|
how="left" if is_transform else "inner",
|
|
3050
3156
|
)
|
|
3051
3157
|
result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
|
|
@@ -3056,7 +3162,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3056
3162
|
result_features = result_features[~result_features[SYSTEM_RECORD_ID].isin(rows_to_drop[SYSTEM_RECORD_ID])]
|
|
3057
3163
|
self.logger.info(f"After dropping target outliers size: {len(result_features)}")
|
|
3058
3164
|
|
|
3059
|
-
result_eval_sets =
|
|
3165
|
+
result_eval_sets = {}
|
|
3060
3166
|
if not is_transform and EVAL_SET_INDEX in result_features.columns:
|
|
3061
3167
|
result_train_features = result_features.loc[result_features[EVAL_SET_INDEX] == 0].copy()
|
|
3062
3168
|
eval_set_indices = list(result_features[EVAL_SET_INDEX].unique())
|
|
@@ -3262,7 +3368,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3262
3368
|
if autofe_feature.op.is_vector:
|
|
3263
3369
|
continue
|
|
3264
3370
|
|
|
3265
|
-
description =
|
|
3371
|
+
description = {}
|
|
3266
3372
|
|
|
3267
3373
|
feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True))
|
|
3268
3374
|
if feature_meta is None:
|
|
@@ -3428,13 +3534,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3428
3534
|
self.warning_counter.increment()
|
|
3429
3535
|
|
|
3430
3536
|
if len(valid_search_keys) == 1:
|
|
3431
|
-
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
|
|
3537
|
+
key, value = list(valid_search_keys.items())[0]
|
|
3538
|
+
# Show warning for country only if country is the only key
|
|
3539
|
+
if x[key].nunique() == 1:
|
|
3540
|
+
msg = self.bundle.get("single_constant_search_key").format(value, x[key].values[0])
|
|
3541
|
+
print(msg)
|
|
3542
|
+
self.logger.warning(msg)
|
|
3543
|
+
self.warning_counter.increment()
|
|
3438
3544
|
|
|
3439
3545
|
self.logger.info(f"Prepared search keys: {valid_search_keys}")
|
|
3440
3546
|
|
|
@@ -3544,61 +3650,68 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3544
3650
|
def check_need_detect(search_key: SearchKey):
|
|
3545
3651
|
return not is_transform or search_key in self.fit_search_keys.values()
|
|
3546
3652
|
|
|
3547
|
-
if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
|
3548
|
-
|
|
3549
|
-
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3653
|
+
# if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
|
3654
|
+
if check_need_detect(SearchKey.POSTAL_CODE):
|
|
3655
|
+
maybe_keys = PostalCodeSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3656
|
+
if maybe_keys:
|
|
3657
|
+
new_keys = {key: SearchKey.POSTAL_CODE for key in maybe_keys}
|
|
3658
|
+
search_keys.update(new_keys)
|
|
3659
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3660
|
+
self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
|
|
3553
3661
|
if not silent_mode:
|
|
3554
|
-
print(self.bundle.get("postal_code_detected").format(
|
|
3662
|
+
print(self.bundle.get("postal_code_detected").format(maybe_keys))
|
|
3555
3663
|
|
|
3556
3664
|
if (
|
|
3557
3665
|
SearchKey.COUNTRY not in search_keys.values()
|
|
3558
3666
|
and self.country_code is None
|
|
3559
3667
|
and check_need_detect(SearchKey.COUNTRY)
|
|
3560
3668
|
):
|
|
3561
|
-
maybe_key = CountrySearchKeyDetector().
|
|
3562
|
-
if maybe_key
|
|
3563
|
-
search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3564
|
-
self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3669
|
+
maybe_key = CountrySearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3670
|
+
if maybe_key:
|
|
3671
|
+
search_keys[maybe_key[0]] = SearchKey.COUNTRY
|
|
3672
|
+
self.autodetected_search_keys[maybe_key[0]] = SearchKey.COUNTRY
|
|
3565
3673
|
self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
|
|
3566
3674
|
if not silent_mode:
|
|
3567
3675
|
print(self.bundle.get("country_detected").format(maybe_key))
|
|
3568
3676
|
|
|
3569
3677
|
if (
|
|
3570
|
-
SearchKey.EMAIL not in search_keys.values()
|
|
3571
|
-
|
|
3678
|
+
# SearchKey.EMAIL not in search_keys.values()
|
|
3679
|
+
SearchKey.HEM not in search_keys.values()
|
|
3572
3680
|
and check_need_detect(SearchKey.HEM)
|
|
3573
3681
|
):
|
|
3574
|
-
|
|
3575
|
-
if
|
|
3682
|
+
maybe_keys = EmailSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3683
|
+
if maybe_keys:
|
|
3576
3684
|
if self.__is_registered or is_demo_dataset:
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
self.
|
|
3685
|
+
new_keys = {key: SearchKey.EMAIL for key in maybe_keys}
|
|
3686
|
+
search_keys.update(new_keys)
|
|
3687
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3688
|
+
self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
|
|
3580
3689
|
if not silent_mode:
|
|
3581
|
-
print(self.bundle.get("email_detected").format(
|
|
3690
|
+
print(self.bundle.get("email_detected").format(maybe_keys))
|
|
3582
3691
|
else:
|
|
3583
3692
|
self.logger.warning(
|
|
3584
|
-
f"Autodetected search key EMAIL in column {
|
|
3693
|
+
f"Autodetected search key EMAIL in column {maybe_keys}."
|
|
3694
|
+
" But not used because not registered user"
|
|
3585
3695
|
)
|
|
3586
3696
|
if not silent_mode:
|
|
3587
|
-
print(self.bundle.get("email_detected_not_registered").format(
|
|
3697
|
+
print(self.bundle.get("email_detected_not_registered").format(maybe_keys))
|
|
3588
3698
|
self.warning_counter.increment()
|
|
3589
3699
|
|
|
3590
|
-
if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
|
|
3591
|
-
|
|
3592
|
-
|
|
3700
|
+
# if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
|
|
3701
|
+
if check_need_detect(SearchKey.PHONE):
|
|
3702
|
+
maybe_keys = PhoneSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3703
|
+
if maybe_keys:
|
|
3593
3704
|
if self.__is_registered or is_demo_dataset:
|
|
3594
|
-
|
|
3595
|
-
|
|
3596
|
-
self.
|
|
3705
|
+
new_keys = {key: SearchKey.PHONE for key in maybe_keys}
|
|
3706
|
+
search_keys.update(new_keys)
|
|
3707
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3708
|
+
self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
|
|
3597
3709
|
if not silent_mode:
|
|
3598
|
-
print(self.bundle.get("phone_detected").format(
|
|
3710
|
+
print(self.bundle.get("phone_detected").format(maybe_keys))
|
|
3599
3711
|
else:
|
|
3600
3712
|
self.logger.warning(
|
|
3601
|
-
f"Autodetected search key PHONE in column {
|
|
3713
|
+
f"Autodetected search key PHONE in column {maybe_keys}. "
|
|
3714
|
+
"But not used because not registered user"
|
|
3602
3715
|
)
|
|
3603
3716
|
if not silent_mode:
|
|
3604
3717
|
print(self.bundle.get("phone_detected_not_registered"))
|
upgini/metadata.py
CHANGED
|
@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Set
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
SYSTEM_RECORD_ID = "system_record_id"
|
|
7
|
+
ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
|
|
8
|
+
SEARCH_KEY_UNNEST = "search_key_unnest"
|
|
7
9
|
SORT_ID = "sort_id"
|
|
8
10
|
EVAL_SET_INDEX = "eval_set_index"
|
|
9
11
|
TARGET = "target"
|
|
@@ -11,7 +13,7 @@ COUNTRY = "country_iso_code"
|
|
|
11
13
|
RENAMED_INDEX = "index_col"
|
|
12
14
|
DEFAULT_INDEX = "index"
|
|
13
15
|
ORIGINAL_INDEX = "original_index"
|
|
14
|
-
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY
|
|
16
|
+
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class FileColumnMeaningType(Enum):
|
|
@@ -37,6 +39,8 @@ class FileColumnMeaningType(Enum):
|
|
|
37
39
|
POSTAL_CODE = "POSTAL_CODE"
|
|
38
40
|
SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
|
|
39
41
|
EVAL_SET_INDEX = "EVAL_SET_INDEX"
|
|
42
|
+
ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
|
|
43
|
+
UNNEST_KEY = "UNNEST_KEY"
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
class SearchKey(Enum):
|
|
@@ -182,6 +186,10 @@ class FileColumnMetadata(BaseModel):
|
|
|
182
186
|
meaningType: FileColumnMeaningType
|
|
183
187
|
minMaxValues: Optional[NumericInterval] = None
|
|
184
188
|
originalName: Optional[str]
|
|
189
|
+
# is this column contains keys from multiple key columns like msisdn1, msisdn2
|
|
190
|
+
isUnnest: bool = False
|
|
191
|
+
# list of original etalon key column names like msisdn1, msisdn2
|
|
192
|
+
unnestKeyNames: Optional[list[str]]
|
|
185
193
|
|
|
186
194
|
|
|
187
195
|
class FileMetadata(BaseModel):
|
|
@@ -276,7 +284,7 @@ class FeaturesFilter(BaseModel):
|
|
|
276
284
|
|
|
277
285
|
|
|
278
286
|
class RuntimeParameters(BaseModel):
|
|
279
|
-
properties: Dict[str, str] =
|
|
287
|
+
properties: Dict[str, str] = {}
|
|
280
288
|
|
|
281
289
|
|
|
282
290
|
class SearchCustomization(BaseModel):
|
upgini/metrics.py
CHANGED
|
@@ -357,7 +357,7 @@ class EstimatorWrapper:
|
|
|
357
357
|
"logger": logger,
|
|
358
358
|
}
|
|
359
359
|
if estimator is None:
|
|
360
|
-
params =
|
|
360
|
+
params = {}
|
|
361
361
|
# if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
|
|
362
362
|
# params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
|
|
363
363
|
if target_type == ModelTaskType.MULTICLASS:
|
|
@@ -88,6 +88,7 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
|
88
88
|
search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
|
|
89
89
|
empty_search_key=Search key {} is empty. Please fill values or remove this search key
|
|
90
90
|
single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
|
|
91
|
+
unsupported_multi_key=Search key {} cannot be used multiple times
|
|
91
92
|
unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
|
|
92
93
|
date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
|
|
93
94
|
invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
@@ -10,16 +10,18 @@ class BaseSearchKeyDetector:
|
|
|
10
10
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
11
|
raise NotImplementedError()
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
|
|
14
|
+
return [
|
|
15
|
+
column_name
|
|
16
|
+
for column_name in column_names
|
|
17
|
+
if self._is_search_key_by_name(column_name)
|
|
18
|
+
]
|
|
17
19
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
for column_name in df.columns:
|
|
20
|
+
def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
|
|
21
|
+
other_columns = [col for col in df.columns if col not in existing_search_keys]
|
|
22
|
+
columns_by_names = self._get_search_keys_by_name(other_columns)
|
|
23
|
+
columns_by_values = []
|
|
24
|
+
for column_name in other_columns:
|
|
24
25
|
if self._is_search_key_by_values(df[column_name]):
|
|
25
|
-
|
|
26
|
+
columns_by_values.append(column_name)
|
|
27
|
+
return list(set(columns_by_names + columns_by_values))
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -126,9 +126,9 @@ class DateTimeSearchKeyConverter:
|
|
|
126
126
|
df.drop(columns=seconds, inplace=True)
|
|
127
127
|
|
|
128
128
|
if keep_time:
|
|
129
|
-
df[self.DATETIME_COL] = df[self.date_column].
|
|
129
|
+
df[self.DATETIME_COL] = df[self.date_column].astype(np.int64) // 1_000_000
|
|
130
130
|
df[self.DATETIME_COL] = df[self.DATETIME_COL].apply(self._int_to_opt).astype("Int64")
|
|
131
|
-
df[self.date_column] = df[self.date_column].dt.floor("D").
|
|
131
|
+
df[self.date_column] = df[self.date_column].dt.floor("D").astype(np.int64) // 1_000_000
|
|
132
132
|
df[self.date_column] = df[self.date_column].apply(self._int_to_opt).astype("Int64")
|
|
133
133
|
|
|
134
134
|
self.logger.info(f"Date after convertion to timestamp: {df[self.date_column]}")
|
|
@@ -3,7 +3,15 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
6
|
+
from upgini.metadata import (
|
|
7
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
8
|
+
EVAL_SET_INDEX,
|
|
9
|
+
SORT_ID,
|
|
10
|
+
SYSTEM_RECORD_ID,
|
|
11
|
+
TARGET,
|
|
12
|
+
ModelTaskType,
|
|
13
|
+
SearchKey,
|
|
14
|
+
)
|
|
7
15
|
from upgini.resource_bundle import ResourceBundle
|
|
8
16
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
9
17
|
from upgini.utils.target_utils import define_task
|
|
@@ -143,6 +151,8 @@ def clean_full_duplicates(
|
|
|
143
151
|
unique_columns = df.columns.tolist()
|
|
144
152
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
145
153
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
154
|
+
if ENTITY_SYSTEM_RECORD_ID in unique_columns:
|
|
155
|
+
unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
|
|
146
156
|
if SORT_ID in unique_columns:
|
|
147
157
|
unique_columns.remove(SORT_ID)
|
|
148
158
|
if EVAL_SET_INDEX in unique_columns:
|
upgini/utils/email_utils.py
CHANGED
|
@@ -38,11 +38,13 @@ class EmailSearchKeyConverter:
|
|
|
38
38
|
email_column: str,
|
|
39
39
|
hem_column: Optional[str],
|
|
40
40
|
search_keys: Dict[str, SearchKey],
|
|
41
|
+
unnest_search_keys: Optional[List[str]] = None,
|
|
41
42
|
logger: Optional[logging.Logger] = None,
|
|
42
43
|
):
|
|
43
44
|
self.email_column = email_column
|
|
44
45
|
self.hem_column = hem_column
|
|
45
46
|
self.search_keys = search_keys
|
|
47
|
+
self.unnest_search_keys = unnest_search_keys
|
|
46
48
|
if logger is not None:
|
|
47
49
|
self.logger = logger
|
|
48
50
|
else:
|
|
@@ -80,9 +82,12 @@ class EmailSearchKeyConverter:
|
|
|
80
82
|
del self.search_keys[self.email_column]
|
|
81
83
|
return df
|
|
82
84
|
self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
|
|
85
|
+
self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
|
|
83
86
|
self.email_converted_to_hem = True
|
|
84
87
|
|
|
85
88
|
del self.search_keys[self.email_column]
|
|
89
|
+
if self.email_column in self.unnest_search_keys:
|
|
90
|
+
self.unnest_search_keys.remove(self.email_column)
|
|
86
91
|
|
|
87
92
|
df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
|
|
88
93
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=qdIxHiDGZT_iNTBswNeIuc9TPfvUlNqvSmRqMyigZBM,46187
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
-
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
5
|
+
upgini/features_enricher.py,sha256=i6Peb4ws4IyZNRKPj8tO8gO-RI1K2xfLX9zDqkNH0bQ,181799
|
|
7
6
|
upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
|
|
8
|
-
upgini/metadata.py,sha256=
|
|
9
|
-
upgini/metrics.py,sha256=
|
|
7
|
+
upgini/metadata.py,sha256=TNZbtIuxYkBFGQu3gGm2flA6vsKyUPN4Q-Du3fFjmSM,10101
|
|
8
|
+
upgini/metrics.py,sha256=YhyPik38cBI5x5KfdiE_qocJnUjZbSqUj8GUtCqnG0g,29648
|
|
10
9
|
upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
|
|
11
10
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
12
11
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
@@ -29,22 +28,22 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
29
28
|
upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
|
|
30
29
|
upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
|
|
31
30
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
32
|
-
upgini/resource_bundle/strings.properties,sha256
|
|
31
|
+
upgini/resource_bundle/strings.properties,sha256=-JDIa0nAoA5utK7UZZAUgLDsozJNI08dDcbIaOSsvQg,26353
|
|
33
32
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
34
33
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
34
|
upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
|
|
36
35
|
upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
|
|
37
36
|
upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
38
37
|
upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
|
|
39
|
-
upgini/utils/base_search_key_detector.py,sha256=
|
|
38
|
+
upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
|
|
40
39
|
upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
|
|
41
40
|
upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
|
|
42
41
|
upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
|
|
43
42
|
upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
|
|
44
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
45
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
43
|
+
upgini/utils/datetime_utils.py,sha256=RW9eGCGQyYBsIU9XbYKt4hQiXUNppb4Grszg4EdKeY4,10398
|
|
44
|
+
upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
|
|
46
45
|
upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
|
|
47
|
-
upgini/utils/email_utils.py,sha256=
|
|
46
|
+
upgini/utils/email_utils.py,sha256=KHqIUagBWd3jOj3V7mW0ZkBOc-2XzAIA3p1xxZgy-L4,3813
|
|
48
47
|
upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
|
|
49
48
|
upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
|
|
50
49
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
@@ -56,8 +55,8 @@ upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,4
|
|
|
56
55
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
57
56
|
upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
|
|
58
57
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
58
|
+
upgini-1.1.278a2.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
59
|
+
upgini-1.1.278a2.dist-info/METADATA,sha256=Ru3Yqgq1AgTr2H-1cupVIsnwAX0pqta2q4fLVd6kdHc,48158
|
|
60
|
+
upgini-1.1.278a2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
61
|
+
upgini-1.1.278a2.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
62
|
+
upgini-1.1.278a2.dist-info/RECORD,,
|
upgini/fingerprint.js
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
|
|
3
|
-
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
|
4
|
-
*
|
|
5
|
-
* This software contains code from open-source projects:
|
|
6
|
-
* MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
|
|
7
|
-
*/
|
|
8
|
-
var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
|
|
File without changes
|
|
File without changes
|
|
File without changes
|