upgini 1.1.274a3388.post2__py3-none-any.whl → 1.1.275a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/autofe/date.py +2 -17
- upgini/autofe/feature.py +1 -70
- upgini/autofe/operand.py +2 -10
- upgini/dataset.py +10 -2
- upgini/features_enricher.py +199 -92
- upgini/fingerprint.js +8 -0
- upgini/metadata.py +9 -1
- upgini/resource_bundle/strings.properties +1 -0
- upgini/utils/base_search_key_detector.py +14 -12
- upgini/utils/deduplicate_utils.py +11 -1
- upgini/utils/email_utils.py +5 -0
- {upgini-1.1.274a3388.post2.dist-info → upgini-1.1.275a1.dist-info}/METADATA +1 -1
- {upgini-1.1.274a3388.post2.dist-info → upgini-1.1.275a1.dist-info}/RECORD +16 -15
- {upgini-1.1.274a3388.post2.dist-info → upgini-1.1.275a1.dist-info}/LICENSE +0 -0
- {upgini-1.1.274a3388.post2.dist-info → upgini-1.1.275a1.dist-info}/WHEEL +0 -0
- {upgini-1.1.274a3388.post2.dist-info → upgini-1.1.275a1.dist-info}/top_level.txt +0 -0
upgini/autofe/date.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Optional, Union
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
|
-
from upgini.autofe.operand import
|
|
6
|
+
from upgini.autofe.operand import PandasOperand
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class DateDiffMixin(BaseModel):
|
|
@@ -24,7 +24,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
24
24
|
name = "date_diff"
|
|
25
25
|
is_binary = True
|
|
26
26
|
has_symmetry_importance = True
|
|
27
|
-
common_type = "date_diff"
|
|
28
27
|
|
|
29
28
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
30
29
|
left = self._convert_to_date(left, self.left_unit)
|
|
@@ -40,7 +39,6 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
|
40
39
|
name = "date_diff_type2"
|
|
41
40
|
is_binary = True
|
|
42
41
|
has_symmetry_importance = True
|
|
43
|
-
common_type = "date_diff"
|
|
44
42
|
|
|
45
43
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
46
44
|
left = self._convert_to_date(left, self.left_unit)
|
|
@@ -62,7 +60,6 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
62
60
|
is_binary = True
|
|
63
61
|
has_symmetry_importance = True
|
|
64
62
|
aggregation: str
|
|
65
|
-
common_type = "date_diff_list"
|
|
66
63
|
|
|
67
64
|
def __init__(self, **data: Any) -> None:
|
|
68
65
|
if "name" not in data:
|
|
@@ -89,9 +86,6 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
89
86
|
|
|
90
87
|
return method(x) if len(x) > 0 else default
|
|
91
88
|
|
|
92
|
-
def make_multi_operand(self, operands: List[Operand]) -> Optional[MultiOperand]:
|
|
93
|
-
return DateListDiffMulti(children=operands, aggregation="")
|
|
94
|
-
|
|
95
89
|
|
|
96
90
|
class DateListDiffBounded(DateListDiff):
|
|
97
91
|
lower_bound: Optional[int]
|
|
@@ -114,12 +108,3 @@ class DateListDiffBounded(DateListDiff):
|
|
|
114
108
|
def _agg(self, x):
|
|
115
109
|
x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
|
|
116
110
|
return super()._agg(x)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
class DateListDiffMulti(DateListDiff, MultiOperand):
|
|
120
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
121
|
-
left = self._convert_to_date(left, self.left_unit)
|
|
122
|
-
right = right.apply(lambda x: pd.arrays.DatetimeArray(self._convert_to_date(x, self.right_unit)))
|
|
123
|
-
|
|
124
|
-
diff = pd.Series(left - right.values).apply(self._diff)
|
|
125
|
-
return diff.apply(lambda x: [c._agg(x) for c in self.children])
|
upgini/autofe/feature.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import itertools
|
|
3
|
-
import operator
|
|
4
3
|
from typing import Dict, List, Optional, Tuple, Union
|
|
5
4
|
|
|
6
|
-
from more_itertools import map_reduce
|
|
7
5
|
import numpy as np
|
|
8
6
|
import pandas as pd
|
|
9
7
|
from pandas._typing import DtypeObj
|
|
10
8
|
|
|
11
9
|
from upgini.autofe.all_operands import find_op
|
|
12
|
-
from upgini.autofe.operand import
|
|
10
|
+
from upgini.autofe.operand import Operand, PandasOperand
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
class Column:
|
|
@@ -31,9 +29,6 @@ class Column:
|
|
|
31
29
|
else:
|
|
32
30
|
return feature_name[2:last_component_idx]
|
|
33
31
|
|
|
34
|
-
def get_display_name(self, **kwargs):
|
|
35
|
-
return self.name
|
|
36
|
-
|
|
37
32
|
def delete_data(self):
|
|
38
33
|
self.data = None
|
|
39
34
|
|
|
@@ -162,8 +157,6 @@ class Feature:
|
|
|
162
157
|
else:
|
|
163
158
|
new_data = new_data.replace([-np.inf, np.inf], np.nan)
|
|
164
159
|
|
|
165
|
-
new_data = new_data.rename(self.get_display_name())
|
|
166
|
-
|
|
167
160
|
if is_root:
|
|
168
161
|
self.data = new_data
|
|
169
162
|
return new_data
|
|
@@ -333,65 +326,3 @@ class FeatureGroup:
|
|
|
333
326
|
self.main_column_node.delete_data()
|
|
334
327
|
for child in self.children:
|
|
335
328
|
child.delete_data()
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
class OperandGroup:
|
|
339
|
-
def __init__(self, operand: MultiOperand, children: List[Union[Column, Feature]]):
|
|
340
|
-
self.op = operand
|
|
341
|
-
self.children = children
|
|
342
|
-
self.data: Optional[pd.DataFrame] = None
|
|
343
|
-
|
|
344
|
-
def get_columns(self, **kwargs) -> List[str]:
|
|
345
|
-
column_list = []
|
|
346
|
-
seen = set()
|
|
347
|
-
for child in self.children:
|
|
348
|
-
columns = child.get_columns(**kwargs)
|
|
349
|
-
column_list.extend([f for f in columns if f not in seen])
|
|
350
|
-
seen.update(columns)
|
|
351
|
-
return column_list
|
|
352
|
-
|
|
353
|
-
def get_display_names(self, **kwargs) -> List[str]:
|
|
354
|
-
names = [Feature(op, self.children).get_display_name(**kwargs) for op in self.op.children]
|
|
355
|
-
return names
|
|
356
|
-
|
|
357
|
-
def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
|
|
358
|
-
if isinstance(self.op, PandasOperand):
|
|
359
|
-
if self.op.is_vector:
|
|
360
|
-
ds = [child.calculate(data) for child in self.children]
|
|
361
|
-
new_data = self.op.calculate(data=ds)
|
|
362
|
-
else:
|
|
363
|
-
d1 = self.children[0].calculate(data)
|
|
364
|
-
d2 = None if len(self.children) < 2 else self.children[1].calculate(data)
|
|
365
|
-
new_data = self.op.calculate(data=d1, left=d1, right=d2)
|
|
366
|
-
|
|
367
|
-
new_data = pd.DataFrame(new_data.values.tolist())
|
|
368
|
-
new_data.columns = self.get_display_names()
|
|
369
|
-
else:
|
|
370
|
-
raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
|
|
371
|
-
|
|
372
|
-
if is_root:
|
|
373
|
-
self.data = new_data
|
|
374
|
-
return new_data
|
|
375
|
-
|
|
376
|
-
@staticmethod
|
|
377
|
-
def make_groups(candidates: List[Feature]) -> List[Union[Feature, "FeatureGroup"]]:
|
|
378
|
-
grouped_features = []
|
|
379
|
-
|
|
380
|
-
for _, features in sorted(
|
|
381
|
-
map_reduce(
|
|
382
|
-
candidates, lambda f: (f.op.common_type or "", ",".join([c.get_display_name() for c in f.children]))
|
|
383
|
-
).items(),
|
|
384
|
-
key=operator.itemgetter(0),
|
|
385
|
-
):
|
|
386
|
-
feature_list = list(features)
|
|
387
|
-
multi_op = feature_list[0].op.make_multi_operand([f.op for f in feature_list])
|
|
388
|
-
if multi_op is not None:
|
|
389
|
-
grouped_features.append(OperandGroup(multi_op, feature_list[0].children))
|
|
390
|
-
else:
|
|
391
|
-
grouped_features.extend(feature_list)
|
|
392
|
-
return grouped_features
|
|
393
|
-
|
|
394
|
-
def delete_data(self):
|
|
395
|
-
self.data = None
|
|
396
|
-
for child in self.children:
|
|
397
|
-
child.delete_data()
|
upgini/autofe/operand.py
CHANGED
|
@@ -5,9 +5,6 @@ import pandas as pd
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
MAIN_COLUMN = "main_column"
|
|
9
|
-
|
|
10
|
-
|
|
11
8
|
class Operand(BaseModel):
|
|
12
9
|
name: str
|
|
13
10
|
alias: Optional[str]
|
|
@@ -21,7 +18,6 @@ class Operand(BaseModel):
|
|
|
21
18
|
is_binary: bool = False
|
|
22
19
|
is_vector: bool = False
|
|
23
20
|
is_distribution_dependent: bool = False
|
|
24
|
-
common_type: Optional[str] = None
|
|
25
21
|
params: Optional[Dict[str, str]]
|
|
26
22
|
|
|
27
23
|
def set_params(self, params: Dict[str, str]):
|
|
@@ -31,8 +27,8 @@ class Operand(BaseModel):
|
|
|
31
27
|
def get_params(self) -> Dict[str, str]:
|
|
32
28
|
return self.params
|
|
33
29
|
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
|
|
31
|
+
MAIN_COLUMN = "main_column"
|
|
36
32
|
|
|
37
33
|
|
|
38
34
|
class PandasOperand(Operand, abc.ABC):
|
|
@@ -86,7 +82,3 @@ class VectorizableMixin(Operand):
|
|
|
86
82
|
value_columns = [col for col in input_columns if col != group_column]
|
|
87
83
|
|
|
88
84
|
return group_column, value_columns
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class MultiOperand(Operand):
|
|
92
|
-
children: List[Operand]
|
upgini/dataset.py
CHANGED
|
@@ -22,7 +22,9 @@ from pandas.api.types import (
|
|
|
22
22
|
from upgini.errors import ValidationError
|
|
23
23
|
from upgini.http import ProgressStage, SearchProgress, _RestClient
|
|
24
24
|
from upgini.metadata import (
|
|
25
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
25
26
|
EVAL_SET_INDEX,
|
|
27
|
+
SEARCH_KEY_UNNEST,
|
|
26
28
|
SYSTEM_COLUMNS,
|
|
27
29
|
SYSTEM_RECORD_ID,
|
|
28
30
|
TARGET,
|
|
@@ -78,6 +80,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
78
80
|
path: Optional[str] = None,
|
|
79
81
|
meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
|
|
80
82
|
search_keys: Optional[List[Tuple[str, ...]]] = None,
|
|
83
|
+
unnest_search_keys: Optional[List[str]] = None,
|
|
81
84
|
model_task_type: Optional[ModelTaskType] = None,
|
|
82
85
|
random_state: Optional[int] = None,
|
|
83
86
|
rest_client: Optional[_RestClient] = None,
|
|
@@ -112,6 +115,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
112
115
|
self.description = description
|
|
113
116
|
self.meaning_types = meaning_types
|
|
114
117
|
self.search_keys = search_keys
|
|
118
|
+
self.unnest_search_keys = unnest_search_keys
|
|
115
119
|
self.ignore_columns = []
|
|
116
120
|
self.hierarchical_group_keys = []
|
|
117
121
|
self.hierarchical_subgroup_keys = []
|
|
@@ -171,7 +175,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
171
175
|
new_columns = []
|
|
172
176
|
dup_counter = 0
|
|
173
177
|
for column in self.data.columns:
|
|
174
|
-
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
|
|
178
|
+
if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
|
|
175
179
|
self.columns_renaming[column] = column
|
|
176
180
|
new_columns.append(column)
|
|
177
181
|
continue
|
|
@@ -352,7 +356,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
352
356
|
|
|
353
357
|
if is_string_dtype(self.data[postal_code]):
|
|
354
358
|
try:
|
|
355
|
-
self.data[postal_code] =
|
|
359
|
+
self.data[postal_code] = (
|
|
360
|
+
self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
|
|
361
|
+
)
|
|
356
362
|
except Exception:
|
|
357
363
|
pass
|
|
358
364
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -802,6 +808,8 @@ class Dataset: # (pd.DataFrame):
|
|
|
802
808
|
meaningType=meaning_type,
|
|
803
809
|
minMaxValues=min_max_values,
|
|
804
810
|
)
|
|
811
|
+
if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
|
|
812
|
+
column_meta.isUnnest = True
|
|
805
813
|
|
|
806
814
|
columns.append(column_meta)
|
|
807
815
|
|
upgini/features_enricher.py
CHANGED
|
@@ -10,6 +10,7 @@ import sys
|
|
|
10
10
|
import tempfile
|
|
11
11
|
import time
|
|
12
12
|
import uuid
|
|
13
|
+
from collections import Counter
|
|
13
14
|
from dataclasses import dataclass
|
|
14
15
|
from threading import Thread
|
|
15
16
|
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
|
@@ -43,9 +44,11 @@ from upgini.mdc import MDC
|
|
|
43
44
|
from upgini.metadata import (
|
|
44
45
|
COUNTRY,
|
|
45
46
|
DEFAULT_INDEX,
|
|
47
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
46
48
|
EVAL_SET_INDEX,
|
|
47
49
|
ORIGINAL_INDEX,
|
|
48
50
|
RENAMED_INDEX,
|
|
51
|
+
SEARCH_KEY_UNNEST,
|
|
49
52
|
SORT_ID,
|
|
50
53
|
SYSTEM_RECORD_ID,
|
|
51
54
|
TARGET,
|
|
@@ -1181,6 +1184,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1181
1184
|
search_keys = self.search_keys.copy()
|
|
1182
1185
|
search_keys = self.__prepare_search_keys(x, search_keys, is_demo_dataset, is_transform=True, silent_mode=True)
|
|
1183
1186
|
|
|
1187
|
+
unnest_search_keys = []
|
|
1188
|
+
|
|
1184
1189
|
extended_X = x.copy()
|
|
1185
1190
|
generated_features = []
|
|
1186
1191
|
date_column = self._get_date_column(search_keys)
|
|
@@ -1191,7 +1196,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1191
1196
|
email_column = self._get_email_column(search_keys)
|
|
1192
1197
|
hem_column = self._get_hem_column(search_keys)
|
|
1193
1198
|
if email_column:
|
|
1194
|
-
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
1199
|
+
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, unnest_search_keys, self.logger)
|
|
1195
1200
|
extended_X = converter.convert(extended_X)
|
|
1196
1201
|
generated_features.extend(converter.generated_features)
|
|
1197
1202
|
if (
|
|
@@ -1902,11 +1907,38 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1902
1907
|
generated_features.extend(converter.generated_features)
|
|
1903
1908
|
else:
|
|
1904
1909
|
self.logger.info("Input dataset hasn't date column")
|
|
1910
|
+
|
|
1911
|
+
# Don't pass all features in backend on transform
|
|
1912
|
+
original_features_for_transform = []
|
|
1913
|
+
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1914
|
+
features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
|
|
1915
|
+
if len(features_not_to_pass) > 0:
|
|
1916
|
+
# Pass only features that need for transform
|
|
1917
|
+
features_for_transform = self._search_task.get_features_for_transform()
|
|
1918
|
+
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1919
|
+
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1920
|
+
original_features_for_transform = [
|
|
1921
|
+
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1922
|
+
]
|
|
1923
|
+
|
|
1924
|
+
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
|
|
1925
|
+
|
|
1926
|
+
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1927
|
+
|
|
1928
|
+
df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
|
|
1929
|
+
df[columns_for_system_record_id], index=False
|
|
1930
|
+
).astype("Float64")
|
|
1931
|
+
|
|
1932
|
+
# Explode multiple search keys
|
|
1933
|
+
df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
|
|
1934
|
+
|
|
1905
1935
|
email_column = self._get_email_column(search_keys)
|
|
1906
1936
|
hem_column = self._get_hem_column(search_keys)
|
|
1907
1937
|
email_converted_to_hem = False
|
|
1908
1938
|
if email_column:
|
|
1909
|
-
converter = EmailSearchKeyConverter(
|
|
1939
|
+
converter = EmailSearchKeyConverter(
|
|
1940
|
+
email_column, hem_column, search_keys, unnest_search_keys, self.logger
|
|
1941
|
+
)
|
|
1910
1942
|
df = converter.convert(df)
|
|
1911
1943
|
generated_features.extend(converter.generated_features)
|
|
1912
1944
|
email_converted_to_hem = converter.email_converted_to_hem
|
|
@@ -1920,30 +1952,21 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1920
1952
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1921
1953
|
|
|
1922
1954
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
1923
|
-
non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1955
|
+
# non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1956
|
+
for col in original_features_for_transform:
|
|
1957
|
+
meaning_types[col] = FileColumnMeaningType.FEATURE
|
|
1958
|
+
features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
|
|
1924
1959
|
|
|
1925
1960
|
if email_converted_to_hem:
|
|
1926
|
-
|
|
1961
|
+
features_not_to_pass.append(email_column)
|
|
1927
1962
|
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
runtime_parameters = self._get_copy_of_runtime_parameters()
|
|
1931
|
-
if len(non_keys_columns) > 0:
|
|
1932
|
-
# Pass only features that need for transform
|
|
1933
|
-
features_for_transform = self._search_task.get_features_for_transform()
|
|
1934
|
-
if features_for_transform is not None and len(features_for_transform) > 0:
|
|
1935
|
-
file_metadata = self._search_task.get_file_metadata(trace_id)
|
|
1936
|
-
original_features_for_transform = [
|
|
1937
|
-
c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
|
|
1938
|
-
]
|
|
1939
|
-
non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
|
|
1940
|
-
|
|
1941
|
-
runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
|
|
1963
|
+
features_not_to_pass = [c for c in features_not_to_pass if c not in original_features_for_transform]
|
|
1964
|
+
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
|
|
1942
1965
|
|
|
1943
1966
|
if add_fit_system_record_id:
|
|
1944
1967
|
df = self.__add_fit_system_record_id(df, dict(), search_keys)
|
|
1945
1968
|
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
|
1946
|
-
|
|
1969
|
+
features_not_to_pass.append(SORT_ID)
|
|
1947
1970
|
|
|
1948
1971
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
|
|
1949
1972
|
|
|
@@ -1951,16 +1974,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1951
1974
|
"Float64"
|
|
1952
1975
|
)
|
|
1953
1976
|
meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
1977
|
+
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
1978
|
+
if SEARCH_KEY_UNNEST in df.columns:
|
|
1979
|
+
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
1954
1980
|
|
|
1955
1981
|
df = df.reset_index(drop=True)
|
|
1956
|
-
system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
|
|
1982
|
+
system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
|
|
1957
1983
|
if add_fit_system_record_id:
|
|
1958
1984
|
system_columns_with_original_index.append(SORT_ID)
|
|
1959
1985
|
df_with_original_index = df[system_columns_with_original_index].copy()
|
|
1960
1986
|
|
|
1961
1987
|
combined_search_keys = combine_search_keys(search_keys.keys())
|
|
1962
1988
|
|
|
1963
|
-
df_without_features = df.drop(columns=
|
|
1989
|
+
df_without_features = df.drop(columns=features_not_to_pass)
|
|
1964
1990
|
|
|
1965
1991
|
df_without_features = clean_full_duplicates(
|
|
1966
1992
|
df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
|
|
@@ -2116,6 +2142,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2116
2142
|
|
|
2117
2143
|
key_types = search_keys.values()
|
|
2118
2144
|
|
|
2145
|
+
# Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
|
|
2146
|
+
multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
|
|
2147
|
+
for multi_key in multi_keys:
|
|
2148
|
+
if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
|
|
2149
|
+
msg = self.bundle.get("unsupported_multi_key").format(multi_key)
|
|
2150
|
+
self.logger.warning(msg)
|
|
2151
|
+
raise ValidationError(msg)
|
|
2152
|
+
|
|
2119
2153
|
if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
|
|
2120
2154
|
msg = self.bundle.get("date_and_datetime_simultanious")
|
|
2121
2155
|
self.logger.warning(msg)
|
|
@@ -2131,11 +2165,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2131
2165
|
self.logger.warning(msg)
|
|
2132
2166
|
raise ValidationError(msg)
|
|
2133
2167
|
|
|
2134
|
-
for key_type in SearchKey.__members__.values():
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2168
|
+
# for key_type in SearchKey.__members__.values():
|
|
2169
|
+
# if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
|
|
2170
|
+
# msg = self.bundle.get("multiple_search_key").format(key_type)
|
|
2171
|
+
# self.logger.warning(msg)
|
|
2172
|
+
# raise ValidationError(msg)
|
|
2139
2173
|
|
|
2140
2174
|
# non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
|
|
2141
2175
|
# if (
|
|
@@ -2220,9 +2254,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2220
2254
|
self.fit_search_keys = self.search_keys.copy()
|
|
2221
2255
|
self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
|
|
2222
2256
|
|
|
2223
|
-
validate_dates_distribution(
|
|
2224
|
-
validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter
|
|
2225
|
-
)
|
|
2257
|
+
validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2226
2258
|
|
|
2227
2259
|
maybe_date_column = self._get_date_column(self.fit_search_keys)
|
|
2228
2260
|
has_date = maybe_date_column is not None
|
|
@@ -2273,14 +2305,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2273
2305
|
self.fit_generated_features.extend(converter.generated_features)
|
|
2274
2306
|
else:
|
|
2275
2307
|
self.logger.info("Input dataset hasn't date column")
|
|
2276
|
-
|
|
2277
|
-
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2278
|
-
email_converted_to_hem = False
|
|
2279
|
-
if email_column:
|
|
2280
|
-
converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
|
|
2281
|
-
df = converter.convert(df)
|
|
2282
|
-
self.fit_generated_features.extend(converter.generated_features)
|
|
2283
|
-
email_converted_to_hem = converter.email_converted_to_hem
|
|
2308
|
+
|
|
2284
2309
|
if (
|
|
2285
2310
|
self.detect_missing_search_keys
|
|
2286
2311
|
and list(self.fit_search_keys.values()) == [SearchKey.DATE]
|
|
@@ -2289,7 +2314,37 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2289
2314
|
converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
|
|
2290
2315
|
df = converter.convert(df)
|
|
2291
2316
|
|
|
2317
|
+
# Explode multiple search keys
|
|
2292
2318
|
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
|
|
2319
|
+
meaning_types = {
|
|
2320
|
+
**{col: key.value for col, key in self.fit_search_keys.items()},
|
|
2321
|
+
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2322
|
+
}
|
|
2323
|
+
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2324
|
+
if eval_set is not None and len(eval_set) > 0:
|
|
2325
|
+
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2326
|
+
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
|
|
2327
|
+
|
|
2328
|
+
# TODO check that this is correct for enrichment
|
|
2329
|
+
self.df_with_original_index = df.copy()
|
|
2330
|
+
|
|
2331
|
+
df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
|
|
2332
|
+
|
|
2333
|
+
# Convert EMAIL to HEM after unnesting to do it only with one column
|
|
2334
|
+
email_column = self._get_email_column(self.fit_search_keys)
|
|
2335
|
+
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2336
|
+
email_converted_to_hem = False
|
|
2337
|
+
if email_column:
|
|
2338
|
+
converter = EmailSearchKeyConverter(
|
|
2339
|
+
email_column, hem_column, self.fit_search_keys, unnest_search_keys, self.logger
|
|
2340
|
+
)
|
|
2341
|
+
df = converter.convert(df)
|
|
2342
|
+
self.fit_generated_features.extend(converter.generated_features)
|
|
2343
|
+
email_converted_to_hem = converter.email_converted_to_hem
|
|
2344
|
+
|
|
2345
|
+
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
|
|
2346
|
+
self.fit_search_keys.keys()
|
|
2347
|
+
)
|
|
2293
2348
|
if email_converted_to_hem:
|
|
2294
2349
|
non_feature_columns.append(email_column)
|
|
2295
2350
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
@@ -2313,12 +2368,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2313
2368
|
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2314
2369
|
}
|
|
2315
2370
|
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2371
|
+
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
2372
|
+
if SEARCH_KEY_UNNEST in df.columns:
|
|
2373
|
+
meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
|
|
2316
2374
|
if eval_set is not None and len(eval_set) > 0:
|
|
2317
2375
|
meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
|
|
2318
2376
|
|
|
2319
|
-
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
|
|
2377
|
+
df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, SYSTEM_RECORD_ID)
|
|
2320
2378
|
|
|
2321
|
-
self.df_with_original_index = df.copy()
|
|
2322
2379
|
df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
|
|
2323
2380
|
|
|
2324
2381
|
combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
|
|
@@ -2326,14 +2383,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2326
2383
|
dataset = Dataset(
|
|
2327
2384
|
"tds_" + str(uuid.uuid4()),
|
|
2328
2385
|
df=df,
|
|
2386
|
+
meaning_types=meaning_types,
|
|
2387
|
+
search_keys=combined_search_keys,
|
|
2388
|
+
unnest_search_keys=unnest_search_keys,
|
|
2329
2389
|
model_task_type=model_task_type,
|
|
2330
2390
|
date_format=self.date_format,
|
|
2331
2391
|
random_state=self.random_state,
|
|
2332
2392
|
rest_client=self.rest_client,
|
|
2333
2393
|
logger=self.logger,
|
|
2334
2394
|
)
|
|
2335
|
-
dataset.meaning_types = meaning_types
|
|
2336
|
-
dataset.search_keys = combined_search_keys
|
|
2337
2395
|
if email_converted_to_hem:
|
|
2338
2396
|
dataset.ignore_columns = [email_column]
|
|
2339
2397
|
|
|
@@ -2863,15 +2921,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2863
2921
|
|
|
2864
2922
|
@staticmethod
|
|
2865
2923
|
def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2866
|
-
for col, t in search_keys.items()
|
|
2867
|
-
|
|
2868
|
-
|
|
2924
|
+
cols = [col for col, t in search_keys.items() if t == SearchKey.EMAIL]
|
|
2925
|
+
if len(cols) > 1:
|
|
2926
|
+
raise Exception("More than one email column found after unnest")
|
|
2927
|
+
if len(cols) == 1:
|
|
2928
|
+
return cols[0]
|
|
2869
2929
|
|
|
2870
2930
|
@staticmethod
|
|
2871
2931
|
def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2872
|
-
for col, t in search_keys.items()
|
|
2873
|
-
|
|
2874
|
-
|
|
2932
|
+
cols = [col for col, t in search_keys.items() if t == SearchKey.HEM]
|
|
2933
|
+
if len(cols) > 1:
|
|
2934
|
+
raise Exception("More than one hem column found after unnest")
|
|
2935
|
+
if len(cols) == 1:
|
|
2936
|
+
return cols[0]
|
|
2875
2937
|
|
|
2876
2938
|
@staticmethod
|
|
2877
2939
|
def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
@@ -2879,8 +2941,42 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2879
2941
|
if t == SearchKey.PHONE:
|
|
2880
2942
|
return col
|
|
2881
2943
|
|
|
2944
|
+
def _explode_multiple_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
|
|
2945
|
+
# find groups of multiple search keys
|
|
2946
|
+
search_key_names_by_type: Dict[SearchKey, str] = dict()
|
|
2947
|
+
for key_name, key_type in search_keys.items():
|
|
2948
|
+
search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
|
|
2949
|
+
search_key_names_by_type = {
|
|
2950
|
+
key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
|
|
2951
|
+
}
|
|
2952
|
+
if len(search_key_names_by_type) == 0:
|
|
2953
|
+
return df, []
|
|
2954
|
+
|
|
2955
|
+
multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
|
|
2956
|
+
other_columns = [col for col in df.columns if col not in multiple_keys_columns]
|
|
2957
|
+
exploded_dfs = []
|
|
2958
|
+
unnest_search_keys = []
|
|
2959
|
+
|
|
2960
|
+
for key_type, key_names in search_key_names_by_type.items():
|
|
2961
|
+
new_search_key = f"upgini_{key_type.name.lower()}_unnest"
|
|
2962
|
+
exploded_df = pd.melt(
|
|
2963
|
+
df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
|
|
2964
|
+
)
|
|
2965
|
+
exploded_dfs.append(exploded_df)
|
|
2966
|
+
for old_key in key_names:
|
|
2967
|
+
del search_keys[old_key]
|
|
2968
|
+
search_keys[new_search_key] = key_type
|
|
2969
|
+
unnest_search_keys.append(new_search_key)
|
|
2970
|
+
|
|
2971
|
+
df = pd.concat(exploded_dfs, ignore_index=True)
|
|
2972
|
+
return df, unnest_search_keys
|
|
2973
|
+
|
|
2882
2974
|
def __add_fit_system_record_id(
|
|
2883
|
-
self,
|
|
2975
|
+
self,
|
|
2976
|
+
df: pd.DataFrame,
|
|
2977
|
+
meaning_types: Dict[str, FileColumnMeaningType],
|
|
2978
|
+
search_keys: Dict[str, SearchKey],
|
|
2979
|
+
id_name: str,
|
|
2884
2980
|
) -> pd.DataFrame:
|
|
2885
2981
|
# save original order or rows
|
|
2886
2982
|
original_index_name = df.index.name
|
|
@@ -2903,9 +2999,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2903
2999
|
[
|
|
2904
3000
|
c
|
|
2905
3001
|
for c in df.columns
|
|
2906
|
-
if c not in sort_columns
|
|
2907
|
-
and c not in sort_exclude_columns
|
|
2908
|
-
and df[c].nunique() > 1
|
|
3002
|
+
if c not in sort_columns and c not in sort_exclude_columns and df[c].nunique() > 1
|
|
2909
3003
|
]
|
|
2910
3004
|
# [
|
|
2911
3005
|
# sk
|
|
@@ -2931,14 +3025,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2931
3025
|
|
|
2932
3026
|
df = df.reset_index(drop=True).reset_index()
|
|
2933
3027
|
# system_record_id saves correct order for fit
|
|
2934
|
-
df = df.rename(columns={DEFAULT_INDEX:
|
|
3028
|
+
df = df.rename(columns={DEFAULT_INDEX: id_name})
|
|
2935
3029
|
|
|
2936
3030
|
# return original order
|
|
2937
3031
|
df = df.set_index(ORIGINAL_INDEX)
|
|
2938
3032
|
df.index.name = original_index_name
|
|
2939
3033
|
df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
|
|
2940
3034
|
|
|
2941
|
-
meaning_types[
|
|
3035
|
+
meaning_types[id_name] = (
|
|
3036
|
+
FileColumnMeaningType.SYSTEM_RECORD_ID
|
|
3037
|
+
if id_name == SYSTEM_RECORD_ID
|
|
3038
|
+
else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
3039
|
+
)
|
|
2942
3040
|
return df
|
|
2943
3041
|
|
|
2944
3042
|
def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -2993,7 +3091,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2993
3091
|
)
|
|
2994
3092
|
|
|
2995
3093
|
comparing_columns = X.columns if is_transform else df_with_original_index.columns
|
|
2996
|
-
dup_features = [
|
|
3094
|
+
dup_features = [
|
|
3095
|
+
c for c in comparing_columns
|
|
3096
|
+
if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
|
|
3097
|
+
]
|
|
2997
3098
|
if len(dup_features) > 0:
|
|
2998
3099
|
self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
|
|
2999
3100
|
raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
|
|
@@ -3004,8 +3105,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3004
3105
|
result_features = pd.merge(
|
|
3005
3106
|
df_with_original_index,
|
|
3006
3107
|
result_features,
|
|
3007
|
-
|
|
3008
|
-
right_on=SYSTEM_RECORD_ID,
|
|
3108
|
+
on=ENTITY_SYSTEM_RECORD_ID,
|
|
3009
3109
|
how="left" if is_transform else "inner",
|
|
3010
3110
|
)
|
|
3011
3111
|
result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
|
|
@@ -3385,13 +3485,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3385
3485
|
self.warning_counter.increment()
|
|
3386
3486
|
|
|
3387
3487
|
if len(valid_search_keys) == 1:
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3393
|
-
|
|
3394
|
-
|
|
3488
|
+
key, value = list(valid_search_keys.items())[0]
|
|
3489
|
+
# Show warning for country only if country is the only key
|
|
3490
|
+
if x[key].nunique() == 1:
|
|
3491
|
+
msg = self.bundle.get("single_constant_search_key").format(value, x[key].values[0])
|
|
3492
|
+
print(msg)
|
|
3493
|
+
self.logger.warning(msg)
|
|
3494
|
+
self.warning_counter.increment()
|
|
3395
3495
|
|
|
3396
3496
|
self.logger.info(f"Prepared search keys: {valid_search_keys}")
|
|
3397
3497
|
|
|
@@ -3501,61 +3601,68 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3501
3601
|
def check_need_detect(search_key: SearchKey):
|
|
3502
3602
|
return not is_transform or search_key in self.fit_search_keys.values()
|
|
3503
3603
|
|
|
3504
|
-
if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3604
|
+
# if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
|
|
3605
|
+
if check_need_detect(SearchKey.POSTAL_CODE):
|
|
3606
|
+
maybe_keys = PostalCodeSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3607
|
+
if maybe_keys:
|
|
3608
|
+
new_keys = {key: SearchKey.POSTAL_CODE for key in maybe_keys}
|
|
3609
|
+
search_keys.update(new_keys)
|
|
3610
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3611
|
+
self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
|
|
3510
3612
|
if not silent_mode:
|
|
3511
|
-
print(self.bundle.get("postal_code_detected").format(
|
|
3613
|
+
print(self.bundle.get("postal_code_detected").format(maybe_keys))
|
|
3512
3614
|
|
|
3513
3615
|
if (
|
|
3514
3616
|
SearchKey.COUNTRY not in search_keys.values()
|
|
3515
3617
|
and self.country_code is None
|
|
3516
3618
|
and check_need_detect(SearchKey.COUNTRY)
|
|
3517
3619
|
):
|
|
3518
|
-
maybe_key = CountrySearchKeyDetector().
|
|
3519
|
-
if maybe_key
|
|
3520
|
-
search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3521
|
-
self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
|
|
3620
|
+
maybe_key = CountrySearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3621
|
+
if maybe_key:
|
|
3622
|
+
search_keys[maybe_key[0]] = SearchKey.COUNTRY
|
|
3623
|
+
self.autodetected_search_keys[maybe_key[0]] = SearchKey.COUNTRY
|
|
3522
3624
|
self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
|
|
3523
3625
|
if not silent_mode:
|
|
3524
3626
|
print(self.bundle.get("country_detected").format(maybe_key))
|
|
3525
3627
|
|
|
3526
3628
|
if (
|
|
3527
|
-
SearchKey.EMAIL not in search_keys.values()
|
|
3528
|
-
|
|
3629
|
+
# SearchKey.EMAIL not in search_keys.values()
|
|
3630
|
+
SearchKey.HEM not in search_keys.values()
|
|
3529
3631
|
and check_need_detect(SearchKey.HEM)
|
|
3530
3632
|
):
|
|
3531
|
-
|
|
3532
|
-
if
|
|
3633
|
+
maybe_keys = EmailSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3634
|
+
if maybe_keys:
|
|
3533
3635
|
if self.__is_registered or is_demo_dataset:
|
|
3534
|
-
|
|
3535
|
-
|
|
3536
|
-
self.
|
|
3636
|
+
new_keys = {key: SearchKey.EMAIL for key in maybe_keys}
|
|
3637
|
+
search_keys.update(new_keys)
|
|
3638
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3639
|
+
self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
|
|
3537
3640
|
if not silent_mode:
|
|
3538
|
-
print(self.bundle.get("email_detected").format(
|
|
3641
|
+
print(self.bundle.get("email_detected").format(maybe_keys))
|
|
3539
3642
|
else:
|
|
3540
3643
|
self.logger.warning(
|
|
3541
|
-
f"Autodetected search key EMAIL in column {
|
|
3644
|
+
f"Autodetected search key EMAIL in column {maybe_keys}."
|
|
3645
|
+
" But not used because not registered user"
|
|
3542
3646
|
)
|
|
3543
3647
|
if not silent_mode:
|
|
3544
|
-
print(self.bundle.get("email_detected_not_registered").format(
|
|
3648
|
+
print(self.bundle.get("email_detected_not_registered").format(maybe_keys))
|
|
3545
3649
|
self.warning_counter.increment()
|
|
3546
3650
|
|
|
3547
|
-
if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
|
|
3548
|
-
|
|
3549
|
-
|
|
3651
|
+
# if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
|
|
3652
|
+
if check_need_detect(SearchKey.PHONE):
|
|
3653
|
+
maybe_keys = PhoneSearchKeyDetector().get_search_key_columns(sample, search_keys)
|
|
3654
|
+
if maybe_keys:
|
|
3550
3655
|
if self.__is_registered or is_demo_dataset:
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
self.
|
|
3656
|
+
new_keys = {key: SearchKey.PHONE for key in maybe_keys}
|
|
3657
|
+
search_keys.update(new_keys)
|
|
3658
|
+
self.autodetected_search_keys.update(new_keys)
|
|
3659
|
+
self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
|
|
3554
3660
|
if not silent_mode:
|
|
3555
|
-
print(self.bundle.get("phone_detected").format(
|
|
3661
|
+
print(self.bundle.get("phone_detected").format(maybe_keys))
|
|
3556
3662
|
else:
|
|
3557
3663
|
self.logger.warning(
|
|
3558
|
-
f"Autodetected search key PHONE in column {
|
|
3664
|
+
f"Autodetected search key PHONE in column {maybe_keys}. "
|
|
3665
|
+
"But not used because not registered user"
|
|
3559
3666
|
)
|
|
3560
3667
|
if not silent_mode:
|
|
3561
3668
|
print(self.bundle.get("phone_detected_not_registered"))
|
upgini/fingerprint.js
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
|
|
3
|
+
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
|
4
|
+
*
|
|
5
|
+
* This software contains code from open-source projects:
|
|
6
|
+
* MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
|
|
7
|
+
*/
|
|
8
|
+
var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
|
upgini/metadata.py
CHANGED
|
@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Set
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
SYSTEM_RECORD_ID = "system_record_id"
|
|
7
|
+
ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
|
|
8
|
+
SEARCH_KEY_UNNEST = "search_key_unnest"
|
|
7
9
|
SORT_ID = "sort_id"
|
|
8
10
|
EVAL_SET_INDEX = "eval_set_index"
|
|
9
11
|
TARGET = "target"
|
|
@@ -11,7 +13,7 @@ COUNTRY = "country_iso_code"
|
|
|
11
13
|
RENAMED_INDEX = "index_col"
|
|
12
14
|
DEFAULT_INDEX = "index"
|
|
13
15
|
ORIGINAL_INDEX = "original_index"
|
|
14
|
-
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY
|
|
16
|
+
SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class FileColumnMeaningType(Enum):
|
|
@@ -37,6 +39,8 @@ class FileColumnMeaningType(Enum):
|
|
|
37
39
|
POSTAL_CODE = "POSTAL_CODE"
|
|
38
40
|
SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
|
|
39
41
|
EVAL_SET_INDEX = "EVAL_SET_INDEX"
|
|
42
|
+
ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
|
|
43
|
+
UNNEST_KEY = "UNNEST_KEY"
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
class SearchKey(Enum):
|
|
@@ -182,6 +186,10 @@ class FileColumnMetadata(BaseModel):
|
|
|
182
186
|
meaningType: FileColumnMeaningType
|
|
183
187
|
minMaxValues: Optional[NumericInterval] = None
|
|
184
188
|
originalName: Optional[str]
|
|
189
|
+
# is this column contains keys from multiple key columns like msisdn1, msisdn2
|
|
190
|
+
isUnnest: bool = False,
|
|
191
|
+
# list of original etalon key column names like msisdn1, msisdn2
|
|
192
|
+
unnestKeyNames: Optional[list[str]]
|
|
185
193
|
|
|
186
194
|
|
|
187
195
|
class FileMetadata(BaseModel):
|
|
@@ -87,6 +87,7 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
|
87
87
|
search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
|
|
88
88
|
empty_search_key=Search key {} is empty. Please fill values or remove this search key
|
|
89
89
|
single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
|
|
90
|
+
unsupported_multi_key=Search key {} cannot be used multiple times
|
|
90
91
|
unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
|
|
91
92
|
date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
|
|
92
93
|
invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List
|
|
1
|
+
from typing import List
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
@@ -10,16 +10,18 @@ class BaseSearchKeyDetector:
|
|
|
10
10
|
def _is_search_key_by_values(self, column: pd.Series) -> bool:
|
|
11
11
|
raise NotImplementedError()
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
|
|
14
|
+
return [
|
|
15
|
+
column_name
|
|
16
|
+
for column_name in column_names
|
|
17
|
+
if self._is_search_key_by_name(column_name)
|
|
18
|
+
]
|
|
17
19
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
for column_name in df.columns:
|
|
20
|
+
def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
|
|
21
|
+
other_columns = [col for col in df.columns if col not in existing_search_keys]
|
|
22
|
+
columns_by_names = self._get_search_keys_by_name(other_columns)
|
|
23
|
+
columns_by_values = []
|
|
24
|
+
for column_name in other_columns:
|
|
24
25
|
if self._is_search_key_by_values(df[column_name]):
|
|
25
|
-
|
|
26
|
+
columns_by_values.append(column_name)
|
|
27
|
+
return list(set(columns_by_names + columns_by_values))
|
|
@@ -3,7 +3,15 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import
|
|
6
|
+
from upgini.metadata import (
|
|
7
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
8
|
+
EVAL_SET_INDEX,
|
|
9
|
+
SORT_ID,
|
|
10
|
+
SYSTEM_RECORD_ID,
|
|
11
|
+
TARGET,
|
|
12
|
+
ModelTaskType,
|
|
13
|
+
SearchKey,
|
|
14
|
+
)
|
|
7
15
|
from upgini.resource_bundle import ResourceBundle
|
|
8
16
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
9
17
|
from upgini.utils.target_utils import define_task
|
|
@@ -143,6 +151,8 @@ def clean_full_duplicates(
|
|
|
143
151
|
unique_columns = df.columns.tolist()
|
|
144
152
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
145
153
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
154
|
+
if ENTITY_SYSTEM_RECORD_ID in unique_columns:
|
|
155
|
+
unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
|
|
146
156
|
if SORT_ID in unique_columns:
|
|
147
157
|
unique_columns.remove(SORT_ID)
|
|
148
158
|
if EVAL_SET_INDEX in unique_columns:
|
upgini/utils/email_utils.py
CHANGED
|
@@ -38,11 +38,13 @@ class EmailSearchKeyConverter:
|
|
|
38
38
|
email_column: str,
|
|
39
39
|
hem_column: Optional[str],
|
|
40
40
|
search_keys: Dict[str, SearchKey],
|
|
41
|
+
unnest_search_keys: Optional[List[str]] = None,
|
|
41
42
|
logger: Optional[logging.Logger] = None,
|
|
42
43
|
):
|
|
43
44
|
self.email_column = email_column
|
|
44
45
|
self.hem_column = hem_column
|
|
45
46
|
self.search_keys = search_keys
|
|
47
|
+
self.unnest_search_keys = unnest_search_keys
|
|
46
48
|
if logger is not None:
|
|
47
49
|
self.logger = logger
|
|
48
50
|
else:
|
|
@@ -80,9 +82,12 @@ class EmailSearchKeyConverter:
|
|
|
80
82
|
del self.search_keys[self.email_column]
|
|
81
83
|
return df
|
|
82
84
|
self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
|
|
85
|
+
self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
|
|
83
86
|
self.email_converted_to_hem = True
|
|
84
87
|
|
|
85
88
|
del self.search_keys[self.email_column]
|
|
89
|
+
if self.email_column in self.unnest_search_keys:
|
|
90
|
+
self.unnest_search_keys.remove(self.email_column)
|
|
86
91
|
|
|
87
92
|
df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
|
|
88
93
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
|
|
6
|
+
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
6
7
|
upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
|
|
7
|
-
upgini/metadata.py,sha256=
|
|
8
|
+
upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
|
|
8
9
|
upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
|
|
9
10
|
upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
|
|
10
11
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
@@ -14,10 +15,10 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
|
|
|
14
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
16
|
upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
|
|
16
17
|
upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
|
|
17
|
-
upgini/autofe/date.py,sha256=
|
|
18
|
-
upgini/autofe/feature.py,sha256=
|
|
18
|
+
upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
|
|
19
|
+
upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
|
|
19
20
|
upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
|
|
20
|
-
upgini/autofe/operand.py,sha256=
|
|
21
|
+
upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
|
|
21
22
|
upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
|
|
22
23
|
upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
|
|
23
24
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -28,22 +29,22 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
28
29
|
upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
|
|
29
30
|
upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
|
|
30
31
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
31
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
32
|
+
upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
|
|
32
33
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
33
34
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
35
|
upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
|
|
35
36
|
upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
|
|
36
37
|
upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
|
|
37
38
|
upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
|
|
38
|
-
upgini/utils/base_search_key_detector.py,sha256=
|
|
39
|
+
upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
|
|
39
40
|
upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
|
|
40
41
|
upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
|
|
41
42
|
upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
|
|
42
43
|
upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
|
|
43
44
|
upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
|
|
44
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
45
|
+
upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
|
|
45
46
|
upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
|
|
46
|
-
upgini/utils/email_utils.py,sha256=
|
|
47
|
+
upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
|
|
47
48
|
upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
|
|
48
49
|
upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
|
|
49
50
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
@@ -55,8 +56,8 @@ upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,4
|
|
|
55
56
|
upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
|
|
56
57
|
upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
|
|
57
58
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
58
|
-
upgini-1.1.
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
59
|
+
upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
60
|
+
upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
|
|
61
|
+
upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
62
|
+
upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
63
|
+
upgini-1.1.275a1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|