upgini 1.1.263__tar.gz → 1.1.263a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.263/src/upgini.egg-info → upgini-1.1.263a1}/PKG-INFO +1 -1
- {upgini-1.1.263 → upgini-1.1.263a1}/setup.py +1 -1
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/all_operands.py +0 -3
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/feature.py +1 -1
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/operand.py +0 -2
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/unary.py +8 -15
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/vector.py +3 -5
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/features_enricher.py +23 -8
- upgini-1.1.263a1/src/upgini/fingerprint.js +8 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/resource_bundle/strings.properties +2 -1
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/deduplicate_utils.py +61 -18
- {upgini-1.1.263 → upgini-1.1.263a1/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini.egg-info/SOURCES.txt +1 -2
- upgini-1.1.263/src/upgini/autofe/date.py +0 -53
- upgini-1.1.263/tests/test_autofe_operands.py +0 -27
- {upgini-1.1.263 → upgini-1.1.263a1}/LICENSE +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/README.md +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/pyproject.toml +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/setup.cfg +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/ads.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/dataset.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/errors.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/http.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/metadata.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/metrics.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/search_task.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/spinner.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_country_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_email_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_metrics.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_target_utils.py +0 -0
- {upgini-1.1.263 → upgini-1.1.263a1}/tests/test_widget.py +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
-
from upgini.autofe.date import DateDiff, DateDiffType2
|
|
3
2
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
4
3
|
from upgini.autofe.operand import Operand
|
|
5
4
|
from upgini.autofe.unary import Abs, Log, Residual, Sqrt, Square, Sigmoid, Floor, Freq
|
|
@@ -36,8 +35,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
36
35
|
Operand(name="GroupByThenNUnique", output_type="int", is_vectorizable=True, is_grouping=True),
|
|
37
36
|
Operand(name="GroupByThenFreq", output_type="float", is_grouping=True),
|
|
38
37
|
Sim(),
|
|
39
|
-
DateDiff(),
|
|
40
|
-
DateDiffType2(),
|
|
41
38
|
]
|
|
42
39
|
}
|
|
43
40
|
|
|
@@ -305,7 +305,7 @@ class FeatureGroup:
|
|
|
305
305
|
grouped_features = []
|
|
306
306
|
|
|
307
307
|
def groupby_func(f: Feature) -> Tuple[Operand, Union[Column, Feature]]:
|
|
308
|
-
return (f.op, f.children[0 if
|
|
308
|
+
return (f.op, f.children[0] if f.op.is_unary or f.op.is_vector else f.children[1])
|
|
309
309
|
|
|
310
310
|
for op_child, features in itertools.groupby(candidates, groupby_func):
|
|
311
311
|
op, main_child = op_child
|
|
@@ -73,8 +73,6 @@ class PandasOperand(Operand, abc.ABC):
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class VectorizableMixin(Operand):
|
|
76
|
-
group_index: int = 1
|
|
77
|
-
|
|
78
76
|
def validate_calculation(self, input_columns: List[str], **kwargs) -> Tuple[str, List[str]]:
|
|
79
77
|
if not kwargs.get(MAIN_COLUMN):
|
|
80
78
|
raise ValueError(f"Expected argument {MAIN_COLUMN} for grouping operator {self.name} not found")
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
from upgini.autofe.operand import PandasOperand
|
|
1
|
+
from upgini.autofe.operand import PandasOperand
|
|
2
2
|
import numpy as np
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class Abs(PandasOperand
|
|
6
|
+
class Abs(PandasOperand):
|
|
7
7
|
name = "abs"
|
|
8
8
|
is_unary = True
|
|
9
9
|
is_vectorizable = True
|
|
10
|
-
group_index = 0
|
|
11
10
|
|
|
12
11
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
13
12
|
return data.abs()
|
|
@@ -16,12 +15,11 @@ class Abs(PandasOperand, VectorizableMixin):
|
|
|
16
15
|
return data.abs()
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
class Log(PandasOperand
|
|
18
|
+
class Log(PandasOperand):
|
|
20
19
|
name = "log"
|
|
21
20
|
is_unary = True
|
|
22
21
|
is_vectorizable = True
|
|
23
22
|
output_type = "float"
|
|
24
|
-
group_index = 0
|
|
25
23
|
|
|
26
24
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
27
25
|
return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
|
|
@@ -30,12 +28,11 @@ class Log(PandasOperand, VectorizableMixin):
|
|
|
30
28
|
return self._round_value(np.log(data.replace(0, np.nan).abs()), 10)
|
|
31
29
|
|
|
32
30
|
|
|
33
|
-
class Sqrt(PandasOperand
|
|
31
|
+
class Sqrt(PandasOperand):
|
|
34
32
|
name = "sqrt"
|
|
35
33
|
is_unary = True
|
|
36
34
|
is_vectorizable = True
|
|
37
35
|
output_type = "float"
|
|
38
|
-
group_index = 0
|
|
39
36
|
|
|
40
37
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
41
38
|
return self._round_value(np.sqrt(np.abs(data)))
|
|
@@ -44,11 +41,10 @@ class Sqrt(PandasOperand, VectorizableMixin):
|
|
|
44
41
|
return self._round_value(np.sqrt(data.abs()))
|
|
45
42
|
|
|
46
43
|
|
|
47
|
-
class Square(PandasOperand
|
|
44
|
+
class Square(PandasOperand):
|
|
48
45
|
name = "square"
|
|
49
46
|
is_unary = True
|
|
50
47
|
is_vectorizable = True
|
|
51
|
-
group_index = 0
|
|
52
48
|
|
|
53
49
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
54
50
|
return np.square(data)
|
|
@@ -57,12 +53,11 @@ class Square(PandasOperand, VectorizableMixin):
|
|
|
57
53
|
return np.square(data)
|
|
58
54
|
|
|
59
55
|
|
|
60
|
-
class Sigmoid(PandasOperand
|
|
56
|
+
class Sigmoid(PandasOperand):
|
|
61
57
|
name = "sigmoid"
|
|
62
58
|
is_unary = True
|
|
63
59
|
is_vectorizable = True
|
|
64
60
|
output_type = "float"
|
|
65
|
-
group_index = 0
|
|
66
61
|
|
|
67
62
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
68
63
|
return self._round_value(1 / (1 + np.exp(-data)))
|
|
@@ -71,13 +66,12 @@ class Sigmoid(PandasOperand, VectorizableMixin):
|
|
|
71
66
|
return self._round_value(1 / (1 + np.exp(-data)))
|
|
72
67
|
|
|
73
68
|
|
|
74
|
-
class Floor(PandasOperand
|
|
69
|
+
class Floor(PandasOperand):
|
|
75
70
|
name = "floor"
|
|
76
71
|
is_unary = True
|
|
77
72
|
is_vectorizable = True
|
|
78
73
|
output_type = "int"
|
|
79
74
|
input_type = "continuous"
|
|
80
|
-
group_index = 0
|
|
81
75
|
|
|
82
76
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
83
77
|
return np.floor(data)
|
|
@@ -86,12 +80,11 @@ class Floor(PandasOperand, VectorizableMixin):
|
|
|
86
80
|
return np.floor(data)
|
|
87
81
|
|
|
88
82
|
|
|
89
|
-
class Residual(PandasOperand
|
|
83
|
+
class Residual(PandasOperand):
|
|
90
84
|
name = "residual"
|
|
91
85
|
is_unary = True
|
|
92
86
|
is_vectorizable = True
|
|
93
87
|
input_type = "continuous"
|
|
94
|
-
group_index = 0
|
|
95
88
|
|
|
96
89
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
97
90
|
return data - np.floor(data)
|
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
from typing import List
|
|
2
2
|
import pandas as pd
|
|
3
|
-
from upgini.autofe.operand import PandasOperand
|
|
3
|
+
from upgini.autofe.operand import PandasOperand
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class Mean(PandasOperand
|
|
6
|
+
class Mean(PandasOperand):
|
|
7
7
|
name = "mean"
|
|
8
8
|
output_type = "float"
|
|
9
9
|
is_vector = True
|
|
10
|
-
group_index = 0
|
|
11
10
|
|
|
12
11
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
13
12
|
return pd.DataFrame(data).T.fillna(0).mean(axis=1)
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
class Sum(PandasOperand
|
|
15
|
+
class Sum(PandasOperand):
|
|
17
16
|
name = "sum"
|
|
18
17
|
is_vector = True
|
|
19
|
-
group_index = 0
|
|
20
18
|
|
|
21
19
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
|
22
20
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
|
@@ -1256,6 +1256,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1256
1256
|
).get_cv_and_groups(X)
|
|
1257
1257
|
else:
|
|
1258
1258
|
from sklearn import __version__ as sklearn_version
|
|
1259
|
+
|
|
1259
1260
|
try:
|
|
1260
1261
|
from sklearn.model_selection._split import GroupsConsumerMixin
|
|
1261
1262
|
|
|
@@ -1684,6 +1685,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1684
1685
|
df = validated_X.copy()
|
|
1685
1686
|
|
|
1686
1687
|
df[TARGET] = validated_y
|
|
1688
|
+
|
|
1689
|
+
df = clean_full_duplicates(df, logger=self.logger, silent=True, bundle=self.bundle)
|
|
1690
|
+
|
|
1687
1691
|
num_samples = _num_samples(df)
|
|
1688
1692
|
if num_samples > Dataset.FIT_SAMPLE_THRESHOLD:
|
|
1689
1693
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_ROWS}")
|
|
@@ -1918,6 +1922,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1918
1922
|
|
|
1919
1923
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
1920
1924
|
non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
|
|
1925
|
+
# Don't pass
|
|
1921
1926
|
if email_converted_to_hem:
|
|
1922
1927
|
non_keys_columns.append(email_column)
|
|
1923
1928
|
|
|
@@ -1939,6 +1944,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1939
1944
|
if add_fit_system_record_id:
|
|
1940
1945
|
df = self.__add_fit_system_record_id(df, dict(), search_keys)
|
|
1941
1946
|
df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
|
|
1947
|
+
non_keys_columns.append(SORT_ID)
|
|
1942
1948
|
|
|
1943
1949
|
columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
|
|
1944
1950
|
|
|
@@ -2877,26 +2883,35 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2877
2883
|
|
|
2878
2884
|
# order by date and idempotent order by other keys
|
|
2879
2885
|
if self.cv not in [CVType.time_series, CVType.blocked_time_series]:
|
|
2886
|
+
sort_exclude_columns = [original_order_name, ORIGINAL_INDEX, EVAL_SET_INDEX, TARGET, "__target"]
|
|
2880
2887
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
2881
2888
|
date_column = DateTimeSearchKeyConverter.DATETIME_COL
|
|
2889
|
+
sort_exclude_columns.append(self._get_date_column(search_keys))
|
|
2882
2890
|
else:
|
|
2883
2891
|
date_column = self._get_date_column(search_keys)
|
|
2884
2892
|
sort_columns = [date_column] if date_column is not None else []
|
|
2885
2893
|
|
|
2886
|
-
|
|
2894
|
+
other_columns = sorted(
|
|
2887
2895
|
[
|
|
2888
|
-
|
|
2889
|
-
for
|
|
2890
|
-
if
|
|
2891
|
-
and
|
|
2892
|
-
and df[
|
|
2896
|
+
c
|
|
2897
|
+
for c in df.columns
|
|
2898
|
+
if c not in sort_columns
|
|
2899
|
+
and c not in sort_exclude_columns
|
|
2900
|
+
and df[c].nunique() > 1
|
|
2893
2901
|
]
|
|
2902
|
+
# [
|
|
2903
|
+
# sk
|
|
2904
|
+
# for sk, key_type in search_keys.items()
|
|
2905
|
+
# if key_type not in [SearchKey.DATE, SearchKey.DATETIME]
|
|
2906
|
+
# and sk in df.columns
|
|
2907
|
+
# and df[sk].nunique() > 1 # don't use constant keys for hash
|
|
2908
|
+
# ]
|
|
2894
2909
|
)
|
|
2895
2910
|
|
|
2896
2911
|
search_keys_hash = "search_keys_hash"
|
|
2897
|
-
if len(
|
|
2912
|
+
if len(other_columns) > 0:
|
|
2898
2913
|
sort_columns.append(search_keys_hash)
|
|
2899
|
-
df[search_keys_hash] = pd.util.hash_pandas_object(df[
|
|
2914
|
+
df[search_keys_hash] = pd.util.hash_pandas_object(df[other_columns], index=False)
|
|
2900
2915
|
|
|
2901
2916
|
df = df.sort_values(by=sort_columns)
|
|
2902
2917
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
|
|
3
|
+
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
|
4
|
+
*
|
|
5
|
+
* This software contains code from open-source projects:
|
|
6
|
+
* MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
|
|
7
|
+
*/
|
|
8
|
+
var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
|
|
@@ -145,7 +145,8 @@ dataset_too_many_rows_registered=X rows limit for transform is {}. Please sample
|
|
|
145
145
|
dataset_empty_column_names=Some column names are empty. Add names please
|
|
146
146
|
dataset_full_duplicates=\nWARNING: {:.5f}% of the rows are fully duplicated
|
|
147
147
|
dataset_diff_target_duplicates=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates with different y values. These rows will be deleted as incorrect\nIncorrect row indexes: {}
|
|
148
|
-
|
|
148
|
+
dataset_train_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in X are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
|
|
149
|
+
dataset_eval_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in eval{} X are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
|
|
149
150
|
dataset_drop_old_dates=\nWARNING: We don't have data before '2000-01-01' and removed all earlier records from the search dataset
|
|
150
151
|
dataset_all_dates_old=There is empty train dataset after removing data before '2000-01-01'
|
|
151
152
|
dataset_invalid_target_type=Unexpected dtype of target for binary task type: {}. Expected int or bool
|
|
@@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Union
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from upgini.metadata import SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
6
|
+
from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
|
|
7
7
|
from upgini.resource_bundle import ResourceBundle
|
|
8
8
|
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
9
9
|
from upgini.utils.target_utils import define_task
|
|
@@ -78,20 +78,58 @@ def remove_fintech_duplicates(
|
|
|
78
78
|
rows_with_diff_target = grouped_by_personal_cols.filter(has_diff_target_within_60_days)
|
|
79
79
|
if len(rows_with_diff_target) > 0:
|
|
80
80
|
unique_keys_to_delete = rows_with_diff_target[personal_cols].drop_duplicates()
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
logger
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
81
|
+
if EVAL_SET_INDEX not in df.columns:
|
|
82
|
+
rows_to_remove = pd.merge(df.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
83
|
+
rows_to_remove = rows_to_remove.set_index(df.index.name or "index")
|
|
84
|
+
perc = len(rows_to_remove) * 100 / len(df)
|
|
85
|
+
msg = bundle.get("dataset_train_diff_target_duplicates_fintech").format(
|
|
86
|
+
perc, len(rows_to_remove), rows_to_remove.index.to_list()
|
|
87
|
+
)
|
|
88
|
+
if not silent:
|
|
89
|
+
print(msg)
|
|
90
|
+
if logger:
|
|
91
|
+
logger.warning(msg)
|
|
92
|
+
logger.info(f"Dataset shape before clean fintech duplicates: {df.shape}")
|
|
93
|
+
df = df[~df.index.isin(rows_to_remove.index)]
|
|
94
|
+
logger.info(f"Dataset shape after clean fintech duplicates: {df.shape}")
|
|
95
|
+
else:
|
|
96
|
+
# Indices in train and eval_set can be the same so we remove rows from them separately
|
|
97
|
+
train = df.query(f"{EVAL_SET_INDEX} == 0")
|
|
98
|
+
train_rows_to_remove = pd.merge(train.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
99
|
+
train_rows_to_remove = train_rows_to_remove.set_index(train.index.name or "index")
|
|
100
|
+
train_perc = len(train_rows_to_remove) * 100 / len(train)
|
|
101
|
+
msg = bundle.get("dataset_train_diff_target_duplicates_fintech").format(
|
|
102
|
+
train_perc, len(train_rows_to_remove), train_rows_to_remove.index.to_list()
|
|
103
|
+
)
|
|
104
|
+
if not silent:
|
|
105
|
+
print(msg)
|
|
106
|
+
if logger:
|
|
107
|
+
logger.warning(msg)
|
|
108
|
+
logger.info(f"Train dataset shape before clean fintech duplicates: {train.shape}")
|
|
109
|
+
train = train[~train.index.isin(train_rows_to_remove.index)]
|
|
110
|
+
logger.info(f"Train dataset shape after clean fintech duplicates: {train.shape}")
|
|
111
|
+
|
|
112
|
+
evals = [df.query(f"{EVAL_SET_INDEX} == {i}") for i in df[EVAL_SET_INDEX].unique() if i != 0]
|
|
113
|
+
new_evals = []
|
|
114
|
+
for i, eval in enumerate(evals):
|
|
115
|
+
eval_rows_to_remove = pd.merge(eval.reset_index(), unique_keys_to_delete, on=personal_cols)
|
|
116
|
+
eval_rows_to_remove = eval_rows_to_remove.set_index(eval.index.name or "index")
|
|
117
|
+
eval_perc = len(eval_rows_to_remove) * 100 / len(eval)
|
|
118
|
+
msg = bundle.get("dataset_eval_diff_target_duplicates_fintech").format(
|
|
119
|
+
eval_perc, len(eval_rows_to_remove), i + 1, eval_rows_to_remove.index.to_list()
|
|
120
|
+
)
|
|
121
|
+
if not silent:
|
|
122
|
+
print(msg)
|
|
123
|
+
if logger:
|
|
124
|
+
logger.warning(msg)
|
|
125
|
+
logger.info(f"Eval {i + 1} dataset shape before clean fintech duplicates: {eval.shape}")
|
|
126
|
+
eval = eval[~eval.index.isin(eval_rows_to_remove.index)]
|
|
127
|
+
logger.info(f"Eval {i + 1} dataset shape after clean fintech duplicates: {eval.shape}")
|
|
128
|
+
new_evals.append(eval)
|
|
129
|
+
|
|
130
|
+
logger.info(f"Dataset shape before clean fintech duplicates: {df.shape}")
|
|
131
|
+
df = pd.concat([train] + new_evals)
|
|
132
|
+
logger.info(f"Dataset shape after clean fintech duplicates: {df.shape}")
|
|
95
133
|
return df
|
|
96
134
|
|
|
97
135
|
|
|
@@ -101,14 +139,18 @@ def clean_full_duplicates(
|
|
|
101
139
|
nrows = len(df)
|
|
102
140
|
if nrows == 0:
|
|
103
141
|
return df
|
|
104
|
-
# Remove
|
|
142
|
+
# Remove full duplicates (exclude system_record_id, sort_id and eval_set_index)
|
|
105
143
|
unique_columns = df.columns.tolist()
|
|
106
144
|
if SYSTEM_RECORD_ID in unique_columns:
|
|
107
145
|
unique_columns.remove(SYSTEM_RECORD_ID)
|
|
108
146
|
if SORT_ID in unique_columns:
|
|
109
147
|
unique_columns.remove(SORT_ID)
|
|
148
|
+
if EVAL_SET_INDEX in unique_columns:
|
|
149
|
+
unique_columns.remove(EVAL_SET_INDEX)
|
|
110
150
|
logger.info(f"Dataset shape before clean duplicates: {df.shape}")
|
|
111
|
-
|
|
151
|
+
# Train segment goes first so if duplicates are found in train and eval set
|
|
152
|
+
# then we keep unique rows in train segment
|
|
153
|
+
df = df.drop_duplicates(subset=unique_columns, keep="first")
|
|
112
154
|
logger.info(f"Dataset shape after clean duplicates: {df.shape}")
|
|
113
155
|
nrows_after_full_dedup = len(df)
|
|
114
156
|
share_full_dedup = 100 * (1 - nrows_after_full_dedup / nrows)
|
|
@@ -123,7 +165,7 @@ def clean_full_duplicates(
|
|
|
123
165
|
marked_duplicates = df.duplicated(subset=unique_columns, keep=False)
|
|
124
166
|
if marked_duplicates.sum() > 0:
|
|
125
167
|
dups_indices = df[marked_duplicates].index.to_list()
|
|
126
|
-
nrows_after_tgt_dedup = len(df.drop_duplicates(subset=unique_columns))
|
|
168
|
+
nrows_after_tgt_dedup = len(df.drop_duplicates(subset=unique_columns, keep=False))
|
|
127
169
|
num_dup_rows = nrows_after_full_dedup - nrows_after_tgt_dedup
|
|
128
170
|
share_tgt_dedup = 100 * num_dup_rows / nrows_after_full_dedup
|
|
129
171
|
|
|
@@ -133,6 +175,7 @@ def clean_full_duplicates(
|
|
|
133
175
|
print(msg)
|
|
134
176
|
df = df.drop_duplicates(subset=unique_columns, keep=False)
|
|
135
177
|
logger.info(f"Dataset shape after clean invalid target duplicates: {df.shape}")
|
|
178
|
+
|
|
136
179
|
return df
|
|
137
180
|
|
|
138
181
|
|
|
@@ -7,6 +7,7 @@ src/upgini/ads.py
|
|
|
7
7
|
src/upgini/dataset.py
|
|
8
8
|
src/upgini/errors.py
|
|
9
9
|
src/upgini/features_enricher.py
|
|
10
|
+
src/upgini/fingerprint.js
|
|
10
11
|
src/upgini/http.py
|
|
11
12
|
src/upgini/metadata.py
|
|
12
13
|
src/upgini/metrics.py
|
|
@@ -23,7 +24,6 @@ src/upgini/ads_management/ads_manager.py
|
|
|
23
24
|
src/upgini/autofe/__init__.py
|
|
24
25
|
src/upgini/autofe/all_operands.py
|
|
25
26
|
src/upgini/autofe/binary.py
|
|
26
|
-
src/upgini/autofe/date.py
|
|
27
27
|
src/upgini/autofe/feature.py
|
|
28
28
|
src/upgini/autofe/groupby.py
|
|
29
29
|
src/upgini/autofe/operand.py
|
|
@@ -64,7 +64,6 @@ src/upgini/utils/sklearn_ext.py
|
|
|
64
64
|
src/upgini/utils/target_utils.py
|
|
65
65
|
src/upgini/utils/track_info.py
|
|
66
66
|
src/upgini/utils/warning_counter.py
|
|
67
|
-
tests/test_autofe_operands.py
|
|
68
67
|
tests/test_binary_dataset.py
|
|
69
68
|
tests/test_blocked_time_series.py
|
|
70
69
|
tests/test_categorical_dataset.py
|
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
from typing import Optional, Union
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
from upgini.autofe.operand import PandasOperand
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class DateDiffMixin:
|
|
9
|
-
diff_unit: str = "D"
|
|
10
|
-
left_unit: Optional[str] = None
|
|
11
|
-
right_unit: Optional[str] = None
|
|
12
|
-
|
|
13
|
-
def _convert_to_date(
|
|
14
|
-
self, x: Union[pd.DataFrame, pd.Series], unit: Optional[str]
|
|
15
|
-
) -> Union[pd.DataFrame, pd.Series]:
|
|
16
|
-
if isinstance(x, pd.DataFrame):
|
|
17
|
-
return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
|
|
18
|
-
|
|
19
|
-
return pd.to_datetime(x, unit=unit)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class DateDiff(PandasOperand, DateDiffMixin):
|
|
23
|
-
name = "date_diff"
|
|
24
|
-
is_binary = True
|
|
25
|
-
has_symmetry_importance = True
|
|
26
|
-
|
|
27
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
28
|
-
left = self._convert_to_date(left, self.left_unit)
|
|
29
|
-
right = self._convert_to_date(right, self.right_unit)
|
|
30
|
-
return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
|
|
31
|
-
|
|
32
|
-
def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
|
|
33
|
-
x[x < 0] = None
|
|
34
|
-
return x
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
38
|
-
name = "date_diff_type2"
|
|
39
|
-
is_binary = True
|
|
40
|
-
has_symmetry_importance = True
|
|
41
|
-
is_vectorizable = False
|
|
42
|
-
|
|
43
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
44
|
-
left = self._convert_to_date(left, self.left_unit)
|
|
45
|
-
right = self._convert_to_date(right, self.right_unit)
|
|
46
|
-
future = right + (left.dt.year - right.dt.year).apply(
|
|
47
|
-
lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
|
|
48
|
-
)
|
|
49
|
-
before = future[future < left]
|
|
50
|
-
future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
|
|
51
|
-
diff = (future - left) / np.timedelta64(1, self.diff_unit)
|
|
52
|
-
|
|
53
|
-
return diff
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
from upgini.autofe.date import DateDiff, DateDiffType2
|
|
3
|
-
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from pandas.testing import assert_series_equal
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def test_date_diff():
|
|
9
|
-
df = pd.DataFrame(
|
|
10
|
-
[[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(2023, 10, 10), datetime(2022, 10, 10)]],
|
|
11
|
-
columns=["date1", "date2"],
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
operand = DateDiff()
|
|
15
|
-
expected_result = pd.Series([10531, None])
|
|
16
|
-
assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def test_date_diff_future():
|
|
20
|
-
df = pd.DataFrame(
|
|
21
|
-
[[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(1993, 4, 10), datetime(2022, 10, 10)]],
|
|
22
|
-
columns=["date1", "date2"],
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
operand = DateDiffType2()
|
|
26
|
-
expected_result = pd.Series([61.0, 182.0])
|
|
27
|
-
assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|