upgini 1.1.274a3388.post2__py3-none-any.whl → 1.1.275a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/autofe/date.py CHANGED
@@ -1,9 +1,9 @@
1
- from typing import Any, List, Optional, Union
1
+ from typing import Any, Optional, Union
2
2
  import numpy as np
3
3
  import pandas as pd
4
4
  from pydantic import BaseModel
5
5
 
6
- from upgini.autofe.operand import MultiOperand, Operand, PandasOperand
6
+ from upgini.autofe.operand import PandasOperand
7
7
 
8
8
 
9
9
  class DateDiffMixin(BaseModel):
@@ -24,7 +24,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
24
24
  name = "date_diff"
25
25
  is_binary = True
26
26
  has_symmetry_importance = True
27
- common_type = "date_diff"
28
27
 
29
28
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
30
29
  left = self._convert_to_date(left, self.left_unit)
@@ -40,7 +39,6 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
40
39
  name = "date_diff_type2"
41
40
  is_binary = True
42
41
  has_symmetry_importance = True
43
- common_type = "date_diff"
44
42
 
45
43
  def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
46
44
  left = self._convert_to_date(left, self.left_unit)
@@ -62,7 +60,6 @@ class DateListDiff(PandasOperand, DateDiffMixin):
62
60
  is_binary = True
63
61
  has_symmetry_importance = True
64
62
  aggregation: str
65
- common_type = "date_diff_list"
66
63
 
67
64
  def __init__(self, **data: Any) -> None:
68
65
  if "name" not in data:
@@ -89,9 +86,6 @@ class DateListDiff(PandasOperand, DateDiffMixin):
89
86
 
90
87
  return method(x) if len(x) > 0 else default
91
88
 
92
- def make_multi_operand(self, operands: List[Operand]) -> Optional[MultiOperand]:
93
- return DateListDiffMulti(children=operands, aggregation="")
94
-
95
89
 
96
90
  class DateListDiffBounded(DateListDiff):
97
91
  lower_bound: Optional[int]
@@ -114,12 +108,3 @@ class DateListDiffBounded(DateListDiff):
114
108
  def _agg(self, x):
115
109
  x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
116
110
  return super()._agg(x)
117
-
118
-
119
- class DateListDiffMulti(DateListDiff, MultiOperand):
120
- def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
121
- left = self._convert_to_date(left, self.left_unit)
122
- right = right.apply(lambda x: pd.arrays.DatetimeArray(self._convert_to_date(x, self.right_unit)))
123
-
124
- diff = pd.Series(left - right.values).apply(self._diff)
125
- return diff.apply(lambda x: [c._agg(x) for c in self.children])
upgini/autofe/feature.py CHANGED
@@ -1,15 +1,13 @@
1
1
  import hashlib
2
2
  import itertools
3
- import operator
4
3
  from typing import Dict, List, Optional, Tuple, Union
5
4
 
6
- from more_itertools import map_reduce
7
5
  import numpy as np
8
6
  import pandas as pd
9
7
  from pandas._typing import DtypeObj
10
8
 
11
9
  from upgini.autofe.all_operands import find_op
12
- from upgini.autofe.operand import MultiOperand, Operand, PandasOperand
10
+ from upgini.autofe.operand import Operand, PandasOperand
13
11
 
14
12
 
15
13
  class Column:
@@ -31,9 +29,6 @@ class Column:
31
29
  else:
32
30
  return feature_name[2:last_component_idx]
33
31
 
34
- def get_display_name(self, **kwargs):
35
- return self.name
36
-
37
32
  def delete_data(self):
38
33
  self.data = None
39
34
 
@@ -162,8 +157,6 @@ class Feature:
162
157
  else:
163
158
  new_data = new_data.replace([-np.inf, np.inf], np.nan)
164
159
 
165
- new_data = new_data.rename(self.get_display_name())
166
-
167
160
  if is_root:
168
161
  self.data = new_data
169
162
  return new_data
@@ -333,65 +326,3 @@ class FeatureGroup:
333
326
  self.main_column_node.delete_data()
334
327
  for child in self.children:
335
328
  child.delete_data()
336
-
337
-
338
- class OperandGroup:
339
- def __init__(self, operand: MultiOperand, children: List[Union[Column, Feature]]):
340
- self.op = operand
341
- self.children = children
342
- self.data: Optional[pd.DataFrame] = None
343
-
344
- def get_columns(self, **kwargs) -> List[str]:
345
- column_list = []
346
- seen = set()
347
- for child in self.children:
348
- columns = child.get_columns(**kwargs)
349
- column_list.extend([f for f in columns if f not in seen])
350
- seen.update(columns)
351
- return column_list
352
-
353
- def get_display_names(self, **kwargs) -> List[str]:
354
- names = [Feature(op, self.children).get_display_name(**kwargs) for op in self.op.children]
355
- return names
356
-
357
- def calculate(self, data: pd.DataFrame, is_root=False) -> pd.DataFrame:
358
- if isinstance(self.op, PandasOperand):
359
- if self.op.is_vector:
360
- ds = [child.calculate(data) for child in self.children]
361
- new_data = self.op.calculate(data=ds)
362
- else:
363
- d1 = self.children[0].calculate(data)
364
- d2 = None if len(self.children) < 2 else self.children[1].calculate(data)
365
- new_data = self.op.calculate(data=d1, left=d1, right=d2)
366
-
367
- new_data = pd.DataFrame(new_data.values.tolist())
368
- new_data.columns = self.get_display_names()
369
- else:
370
- raise NotImplementedError(f"Unrecognized operator {self.op.name}.")
371
-
372
- if is_root:
373
- self.data = new_data
374
- return new_data
375
-
376
- @staticmethod
377
- def make_groups(candidates: List[Feature]) -> List[Union[Feature, "FeatureGroup"]]:
378
- grouped_features = []
379
-
380
- for _, features in sorted(
381
- map_reduce(
382
- candidates, lambda f: (f.op.common_type or "", ",".join([c.get_display_name() for c in f.children]))
383
- ).items(),
384
- key=operator.itemgetter(0),
385
- ):
386
- feature_list = list(features)
387
- multi_op = feature_list[0].op.make_multi_operand([f.op for f in feature_list])
388
- if multi_op is not None:
389
- grouped_features.append(OperandGroup(multi_op, feature_list[0].children))
390
- else:
391
- grouped_features.extend(feature_list)
392
- return grouped_features
393
-
394
- def delete_data(self):
395
- self.data = None
396
- for child in self.children:
397
- child.delete_data()
upgini/autofe/operand.py CHANGED
@@ -5,9 +5,6 @@ import pandas as pd
5
5
  import numpy as np
6
6
 
7
7
 
8
- MAIN_COLUMN = "main_column"
9
-
10
-
11
8
  class Operand(BaseModel):
12
9
  name: str
13
10
  alias: Optional[str]
@@ -21,7 +18,6 @@ class Operand(BaseModel):
21
18
  is_binary: bool = False
22
19
  is_vector: bool = False
23
20
  is_distribution_dependent: bool = False
24
- common_type: Optional[str] = None
25
21
  params: Optional[Dict[str, str]]
26
22
 
27
23
  def set_params(self, params: Dict[str, str]):
@@ -31,8 +27,8 @@ class Operand(BaseModel):
31
27
  def get_params(self) -> Dict[str, str]:
32
28
  return self.params
33
29
 
34
- def make_multi_operand(self, operands: List["Operand"]) -> Optional["MultiOperand"]:
35
- return None
30
+
31
+ MAIN_COLUMN = "main_column"
36
32
 
37
33
 
38
34
  class PandasOperand(Operand, abc.ABC):
@@ -86,7 +82,3 @@ class VectorizableMixin(Operand):
86
82
  value_columns = [col for col in input_columns if col != group_column]
87
83
 
88
84
  return group_column, value_columns
89
-
90
-
91
- class MultiOperand(Operand):
92
- children: List[Operand]
upgini/dataset.py CHANGED
@@ -22,7 +22,9 @@ from pandas.api.types import (
22
22
  from upgini.errors import ValidationError
23
23
  from upgini.http import ProgressStage, SearchProgress, _RestClient
24
24
  from upgini.metadata import (
25
+ ENTITY_SYSTEM_RECORD_ID,
25
26
  EVAL_SET_INDEX,
27
+ SEARCH_KEY_UNNEST,
26
28
  SYSTEM_COLUMNS,
27
29
  SYSTEM_RECORD_ID,
28
30
  TARGET,
@@ -78,6 +80,7 @@ class Dataset: # (pd.DataFrame):
78
80
  path: Optional[str] = None,
79
81
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
80
82
  search_keys: Optional[List[Tuple[str, ...]]] = None,
83
+ unnest_search_keys: Optional[List[str]] = None,
81
84
  model_task_type: Optional[ModelTaskType] = None,
82
85
  random_state: Optional[int] = None,
83
86
  rest_client: Optional[_RestClient] = None,
@@ -112,6 +115,7 @@ class Dataset: # (pd.DataFrame):
112
115
  self.description = description
113
116
  self.meaning_types = meaning_types
114
117
  self.search_keys = search_keys
118
+ self.unnest_search_keys = unnest_search_keys
115
119
  self.ignore_columns = []
116
120
  self.hierarchical_group_keys = []
117
121
  self.hierarchical_subgroup_keys = []
@@ -171,7 +175,7 @@ class Dataset: # (pd.DataFrame):
171
175
  new_columns = []
172
176
  dup_counter = 0
173
177
  for column in self.data.columns:
174
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
178
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
175
179
  self.columns_renaming[column] = column
176
180
  new_columns.append(column)
177
181
  continue
@@ -352,7 +356,9 @@ class Dataset: # (pd.DataFrame):
352
356
 
353
357
  if is_string_dtype(self.data[postal_code]):
354
358
  try:
355
- self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
359
+ self.data[postal_code] = (
360
+ self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
361
+ )
356
362
  except Exception:
357
363
  pass
358
364
  elif is_float_dtype(self.data[postal_code]):
@@ -802,6 +808,8 @@ class Dataset: # (pd.DataFrame):
802
808
  meaningType=meaning_type,
803
809
  minMaxValues=min_max_values,
804
810
  )
811
+ if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
812
+ column_meta.isUnnest = True
805
813
 
806
814
  columns.append(column_meta)
807
815
 
@@ -10,6 +10,7 @@ import sys
10
10
  import tempfile
11
11
  import time
12
12
  import uuid
13
+ from collections import Counter
13
14
  from dataclasses import dataclass
14
15
  from threading import Thread
15
16
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -43,9 +44,11 @@ from upgini.mdc import MDC
43
44
  from upgini.metadata import (
44
45
  COUNTRY,
45
46
  DEFAULT_INDEX,
47
+ ENTITY_SYSTEM_RECORD_ID,
46
48
  EVAL_SET_INDEX,
47
49
  ORIGINAL_INDEX,
48
50
  RENAMED_INDEX,
51
+ SEARCH_KEY_UNNEST,
49
52
  SORT_ID,
50
53
  SYSTEM_RECORD_ID,
51
54
  TARGET,
@@ -1181,6 +1184,8 @@ class FeaturesEnricher(TransformerMixin):
1181
1184
  search_keys = self.search_keys.copy()
1182
1185
  search_keys = self.__prepare_search_keys(x, search_keys, is_demo_dataset, is_transform=True, silent_mode=True)
1183
1186
 
1187
+ unnest_search_keys = []
1188
+
1184
1189
  extended_X = x.copy()
1185
1190
  generated_features = []
1186
1191
  date_column = self._get_date_column(search_keys)
@@ -1191,7 +1196,7 @@ class FeaturesEnricher(TransformerMixin):
1191
1196
  email_column = self._get_email_column(search_keys)
1192
1197
  hem_column = self._get_hem_column(search_keys)
1193
1198
  if email_column:
1194
- converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1199
+ converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, unnest_search_keys, self.logger)
1195
1200
  extended_X = converter.convert(extended_X)
1196
1201
  generated_features.extend(converter.generated_features)
1197
1202
  if (
@@ -1902,11 +1907,38 @@ class FeaturesEnricher(TransformerMixin):
1902
1907
  generated_features.extend(converter.generated_features)
1903
1908
  else:
1904
1909
  self.logger.info("Input dataset hasn't date column")
1910
+
1911
+ # Don't pass all features in backend on transform
1912
+ original_features_for_transform = []
1913
+ runtime_parameters = self._get_copy_of_runtime_parameters()
1914
+ features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
1915
+ if len(features_not_to_pass) > 0:
1916
+ # Pass only features that need for transform
1917
+ features_for_transform = self._search_task.get_features_for_transform()
1918
+ if features_for_transform is not None and len(features_for_transform) > 0:
1919
+ file_metadata = self._search_task.get_file_metadata(trace_id)
1920
+ original_features_for_transform = [
1921
+ c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1922
+ ]
1923
+
1924
+ runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1925
+
1926
+ columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1927
+
1928
+ df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
1929
+ df[columns_for_system_record_id], index=False
1930
+ ).astype("Float64")
1931
+
1932
+ # Explode multiple search keys
1933
+ df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
1934
+
1905
1935
  email_column = self._get_email_column(search_keys)
1906
1936
  hem_column = self._get_hem_column(search_keys)
1907
1937
  email_converted_to_hem = False
1908
1938
  if email_column:
1909
- converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1939
+ converter = EmailSearchKeyConverter(
1940
+ email_column, hem_column, search_keys, unnest_search_keys, self.logger
1941
+ )
1910
1942
  df = converter.convert(df)
1911
1943
  generated_features.extend(converter.generated_features)
1912
1944
  email_converted_to_hem = converter.email_converted_to_hem
@@ -1920,30 +1952,21 @@ class FeaturesEnricher(TransformerMixin):
1920
1952
  generated_features = [f for f in generated_features if f in self.fit_generated_features]
1921
1953
 
1922
1954
  meaning_types = {col: key.value for col, key in search_keys.items()}
1923
- non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1955
+ # non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1956
+ for col in original_features_for_transform:
1957
+ meaning_types[col] = FileColumnMeaningType.FEATURE
1958
+ features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
1924
1959
 
1925
1960
  if email_converted_to_hem:
1926
- non_keys_columns.append(email_column)
1961
+ features_not_to_pass.append(email_column)
1927
1962
 
1928
- # Don't pass features in backend on transform
1929
- original_features_for_transform = None
1930
- runtime_parameters = self._get_copy_of_runtime_parameters()
1931
- if len(non_keys_columns) > 0:
1932
- # Pass only features that need for transform
1933
- features_for_transform = self._search_task.get_features_for_transform()
1934
- if features_for_transform is not None and len(features_for_transform) > 0:
1935
- file_metadata = self._search_task.get_file_metadata(trace_id)
1936
- original_features_for_transform = [
1937
- c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1938
- ]
1939
- non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
1940
-
1941
- runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1963
+ features_not_to_pass = [c for c in features_not_to_pass if c not in original_features_for_transform]
1964
+ columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1942
1965
 
1943
1966
  if add_fit_system_record_id:
1944
1967
  df = self.__add_fit_system_record_id(df, dict(), search_keys)
1945
1968
  df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
1946
- non_keys_columns.append(SORT_ID)
1969
+ features_not_to_pass.append(SORT_ID)
1947
1970
 
1948
1971
  columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
1949
1972
 
@@ -1951,16 +1974,19 @@ class FeaturesEnricher(TransformerMixin):
1951
1974
  "Float64"
1952
1975
  )
1953
1976
  meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
1977
+ meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
1978
+ if SEARCH_KEY_UNNEST in df.columns:
1979
+ meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
1954
1980
 
1955
1981
  df = df.reset_index(drop=True)
1956
- system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
1982
+ system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
1957
1983
  if add_fit_system_record_id:
1958
1984
  system_columns_with_original_index.append(SORT_ID)
1959
1985
  df_with_original_index = df[system_columns_with_original_index].copy()
1960
1986
 
1961
1987
  combined_search_keys = combine_search_keys(search_keys.keys())
1962
1988
 
1963
- df_without_features = df.drop(columns=non_keys_columns)
1989
+ df_without_features = df.drop(columns=features_not_to_pass)
1964
1990
 
1965
1991
  df_without_features = clean_full_duplicates(
1966
1992
  df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
@@ -2116,6 +2142,14 @@ class FeaturesEnricher(TransformerMixin):
2116
2142
 
2117
2143
  key_types = search_keys.values()
2118
2144
 
2145
+ # Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
2146
+ multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
2147
+ for multi_key in multi_keys:
2148
+ if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
2149
+ msg = self.bundle.get("unsupported_multi_key").format(multi_key)
2150
+ self.logger.warning(msg)
2151
+ raise ValidationError(msg)
2152
+
2119
2153
  if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
2120
2154
  msg = self.bundle.get("date_and_datetime_simultanious")
2121
2155
  self.logger.warning(msg)
@@ -2131,11 +2165,11 @@ class FeaturesEnricher(TransformerMixin):
2131
2165
  self.logger.warning(msg)
2132
2166
  raise ValidationError(msg)
2133
2167
 
2134
- for key_type in SearchKey.__members__.values():
2135
- if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2136
- msg = self.bundle.get("multiple_search_key").format(key_type)
2137
- self.logger.warning(msg)
2138
- raise ValidationError(msg)
2168
+ # for key_type in SearchKey.__members__.values():
2169
+ # if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2170
+ # msg = self.bundle.get("multiple_search_key").format(key_type)
2171
+ # self.logger.warning(msg)
2172
+ # raise ValidationError(msg)
2139
2173
 
2140
2174
  # non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
2141
2175
  # if (
@@ -2220,9 +2254,7 @@ class FeaturesEnricher(TransformerMixin):
2220
2254
  self.fit_search_keys = self.search_keys.copy()
2221
2255
  self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
2222
2256
 
2223
- validate_dates_distribution(
2224
- validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter
2225
- )
2257
+ validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
2226
2258
 
2227
2259
  maybe_date_column = self._get_date_column(self.fit_search_keys)
2228
2260
  has_date = maybe_date_column is not None
@@ -2273,14 +2305,7 @@ class FeaturesEnricher(TransformerMixin):
2273
2305
  self.fit_generated_features.extend(converter.generated_features)
2274
2306
  else:
2275
2307
  self.logger.info("Input dataset hasn't date column")
2276
- email_column = self._get_email_column(self.fit_search_keys)
2277
- hem_column = self._get_hem_column(self.fit_search_keys)
2278
- email_converted_to_hem = False
2279
- if email_column:
2280
- converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
2281
- df = converter.convert(df)
2282
- self.fit_generated_features.extend(converter.generated_features)
2283
- email_converted_to_hem = converter.email_converted_to_hem
2308
+
2284
2309
  if (
2285
2310
  self.detect_missing_search_keys
2286
2311
  and list(self.fit_search_keys.values()) == [SearchKey.DATE]
@@ -2289,7 +2314,37 @@ class FeaturesEnricher(TransformerMixin):
2289
2314
  converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
2290
2315
  df = converter.convert(df)
2291
2316
 
2317
+ # Explode multiple search keys
2292
2318
  non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
2319
+ meaning_types = {
2320
+ **{col: key.value for col, key in self.fit_search_keys.items()},
2321
+ **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2322
+ }
2323
+ meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2324
+ if eval_set is not None and len(eval_set) > 0:
2325
+ meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2326
+ df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
2327
+
2328
+ # TODO check that this is correct for enrichment
2329
+ self.df_with_original_index = df.copy()
2330
+
2331
+ df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
2332
+
2333
+ # Convert EMAIL to HEM after unnesting to do it only with one column
2334
+ email_column = self._get_email_column(self.fit_search_keys)
2335
+ hem_column = self._get_hem_column(self.fit_search_keys)
2336
+ email_converted_to_hem = False
2337
+ if email_column:
2338
+ converter = EmailSearchKeyConverter(
2339
+ email_column, hem_column, self.fit_search_keys, unnest_search_keys, self.logger
2340
+ )
2341
+ df = converter.convert(df)
2342
+ self.fit_generated_features.extend(converter.generated_features)
2343
+ email_converted_to_hem = converter.email_converted_to_hem
2344
+
2345
+ non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
2346
+ self.fit_search_keys.keys()
2347
+ )
2293
2348
  if email_converted_to_hem:
2294
2349
  non_feature_columns.append(email_column)
2295
2350
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
@@ -2313,12 +2368,14 @@ class FeaturesEnricher(TransformerMixin):
2313
2368
  **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2314
2369
  }
2315
2370
  meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2371
+ meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
2372
+ if SEARCH_KEY_UNNEST in df.columns:
2373
+ meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
2316
2374
  if eval_set is not None and len(eval_set) > 0:
2317
2375
  meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2318
2376
 
2319
- df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
2377
+ df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, SYSTEM_RECORD_ID)
2320
2378
 
2321
- self.df_with_original_index = df.copy()
2322
2379
  df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
2323
2380
 
2324
2381
  combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
@@ -2326,14 +2383,15 @@ class FeaturesEnricher(TransformerMixin):
2326
2383
  dataset = Dataset(
2327
2384
  "tds_" + str(uuid.uuid4()),
2328
2385
  df=df,
2386
+ meaning_types=meaning_types,
2387
+ search_keys=combined_search_keys,
2388
+ unnest_search_keys=unnest_search_keys,
2329
2389
  model_task_type=model_task_type,
2330
2390
  date_format=self.date_format,
2331
2391
  random_state=self.random_state,
2332
2392
  rest_client=self.rest_client,
2333
2393
  logger=self.logger,
2334
2394
  )
2335
- dataset.meaning_types = meaning_types
2336
- dataset.search_keys = combined_search_keys
2337
2395
  if email_converted_to_hem:
2338
2396
  dataset.ignore_columns = [email_column]
2339
2397
 
@@ -2863,15 +2921,19 @@ class FeaturesEnricher(TransformerMixin):
2863
2921
 
2864
2922
  @staticmethod
2865
2923
  def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2866
- for col, t in search_keys.items():
2867
- if t == SearchKey.EMAIL:
2868
- return col
2924
+ cols = [col for col, t in search_keys.items() if t == SearchKey.EMAIL]
2925
+ if len(cols) > 1:
2926
+ raise Exception("More than one email column found after unnest")
2927
+ if len(cols) == 1:
2928
+ return cols[0]
2869
2929
 
2870
2930
  @staticmethod
2871
2931
  def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2872
- for col, t in search_keys.items():
2873
- if t == SearchKey.HEM:
2874
- return col
2932
+ cols = [col for col, t in search_keys.items() if t == SearchKey.HEM]
2933
+ if len(cols) > 1:
2934
+ raise Exception("More than one hem column found after unnest")
2935
+ if len(cols) == 1:
2936
+ return cols[0]
2875
2937
 
2876
2938
  @staticmethod
2877
2939
  def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
@@ -2879,8 +2941,42 @@ class FeaturesEnricher(TransformerMixin):
2879
2941
  if t == SearchKey.PHONE:
2880
2942
  return col
2881
2943
 
2944
+ def _explode_multiple_search_keys(self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]) -> pd.DataFrame:
2945
+ # find groups of multiple search keys
2946
+ search_key_names_by_type: Dict[SearchKey, str] = dict()
2947
+ for key_name, key_type in search_keys.items():
2948
+ search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
2949
+ search_key_names_by_type = {
2950
+ key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
2951
+ }
2952
+ if len(search_key_names_by_type) == 0:
2953
+ return df, []
2954
+
2955
+ multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
2956
+ other_columns = [col for col in df.columns if col not in multiple_keys_columns]
2957
+ exploded_dfs = []
2958
+ unnest_search_keys = []
2959
+
2960
+ for key_type, key_names in search_key_names_by_type.items():
2961
+ new_search_key = f"upgini_{key_type.name.lower()}_unnest"
2962
+ exploded_df = pd.melt(
2963
+ df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
2964
+ )
2965
+ exploded_dfs.append(exploded_df)
2966
+ for old_key in key_names:
2967
+ del search_keys[old_key]
2968
+ search_keys[new_search_key] = key_type
2969
+ unnest_search_keys.append(new_search_key)
2970
+
2971
+ df = pd.concat(exploded_dfs, ignore_index=True)
2972
+ return df, unnest_search_keys
2973
+
2882
2974
  def __add_fit_system_record_id(
2883
- self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
2975
+ self,
2976
+ df: pd.DataFrame,
2977
+ meaning_types: Dict[str, FileColumnMeaningType],
2978
+ search_keys: Dict[str, SearchKey],
2979
+ id_name: str,
2884
2980
  ) -> pd.DataFrame:
2885
2981
  # save original order or rows
2886
2982
  original_index_name = df.index.name
@@ -2903,9 +2999,7 @@ class FeaturesEnricher(TransformerMixin):
2903
2999
  [
2904
3000
  c
2905
3001
  for c in df.columns
2906
- if c not in sort_columns
2907
- and c not in sort_exclude_columns
2908
- and df[c].nunique() > 1
3002
+ if c not in sort_columns and c not in sort_exclude_columns and df[c].nunique() > 1
2909
3003
  ]
2910
3004
  # [
2911
3005
  # sk
@@ -2931,14 +3025,18 @@ class FeaturesEnricher(TransformerMixin):
2931
3025
 
2932
3026
  df = df.reset_index(drop=True).reset_index()
2933
3027
  # system_record_id saves correct order for fit
2934
- df = df.rename(columns={DEFAULT_INDEX: SYSTEM_RECORD_ID})
3028
+ df = df.rename(columns={DEFAULT_INDEX: id_name})
2935
3029
 
2936
3030
  # return original order
2937
3031
  df = df.set_index(ORIGINAL_INDEX)
2938
3032
  df.index.name = original_index_name
2939
3033
  df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
2940
3034
 
2941
- meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
3035
+ meaning_types[id_name] = (
3036
+ FileColumnMeaningType.SYSTEM_RECORD_ID
3037
+ if id_name == SYSTEM_RECORD_ID
3038
+ else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
3039
+ )
2942
3040
  return df
2943
3041
 
2944
3042
  def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -2993,7 +3091,10 @@ class FeaturesEnricher(TransformerMixin):
2993
3091
  )
2994
3092
 
2995
3093
  comparing_columns = X.columns if is_transform else df_with_original_index.columns
2996
- dup_features = [c for c in comparing_columns if c in result_features.columns and c != SYSTEM_RECORD_ID]
3094
+ dup_features = [
3095
+ c for c in comparing_columns
3096
+ if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
3097
+ ]
2997
3098
  if len(dup_features) > 0:
2998
3099
  self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
2999
3100
  raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
@@ -3004,8 +3105,7 @@ class FeaturesEnricher(TransformerMixin):
3004
3105
  result_features = pd.merge(
3005
3106
  df_with_original_index,
3006
3107
  result_features,
3007
- left_on=SYSTEM_RECORD_ID,
3008
- right_on=SYSTEM_RECORD_ID,
3108
+ on=ENTITY_SYSTEM_RECORD_ID,
3009
3109
  how="left" if is_transform else "inner",
3010
3110
  )
3011
3111
  result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
@@ -3385,13 +3485,13 @@ class FeaturesEnricher(TransformerMixin):
3385
3485
  self.warning_counter.increment()
3386
3486
 
3387
3487
  if len(valid_search_keys) == 1:
3388
- for k, v in valid_search_keys.items():
3389
- # Show warning for country only if country is the only key
3390
- if x[k].nunique() == 1 and (v != SearchKey.COUNTRY or len(valid_search_keys) == 1):
3391
- msg = self.bundle.get("single_constant_search_key").format(v, x[k].values[0])
3392
- print(msg)
3393
- self.logger.warning(msg)
3394
- self.warning_counter.increment()
3488
+ key, value = list(valid_search_keys.items())[0]
3489
+ # Show warning for country only if country is the only key
3490
+ if x[key].nunique() == 1:
3491
+ msg = self.bundle.get("single_constant_search_key").format(value, x[key].values[0])
3492
+ print(msg)
3493
+ self.logger.warning(msg)
3494
+ self.warning_counter.increment()
3395
3495
 
3396
3496
  self.logger.info(f"Prepared search keys: {valid_search_keys}")
3397
3497
 
@@ -3501,61 +3601,68 @@ class FeaturesEnricher(TransformerMixin):
3501
3601
  def check_need_detect(search_key: SearchKey):
3502
3602
  return not is_transform or search_key in self.fit_search_keys.values()
3503
3603
 
3504
- if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3505
- maybe_key = PostalCodeSearchKeyDetector().get_search_key_column(sample)
3506
- if maybe_key is not None:
3507
- search_keys[maybe_key] = SearchKey.POSTAL_CODE
3508
- self.autodetected_search_keys[maybe_key] = SearchKey.POSTAL_CODE
3509
- self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_key}")
3604
+ # if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3605
+ if check_need_detect(SearchKey.POSTAL_CODE):
3606
+ maybe_keys = PostalCodeSearchKeyDetector().get_search_key_columns(sample, search_keys)
3607
+ if maybe_keys:
3608
+ new_keys = {key: SearchKey.POSTAL_CODE for key in maybe_keys}
3609
+ search_keys.update(new_keys)
3610
+ self.autodetected_search_keys.update(new_keys)
3611
+ self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
3510
3612
  if not silent_mode:
3511
- print(self.bundle.get("postal_code_detected").format(maybe_key))
3613
+ print(self.bundle.get("postal_code_detected").format(maybe_keys))
3512
3614
 
3513
3615
  if (
3514
3616
  SearchKey.COUNTRY not in search_keys.values()
3515
3617
  and self.country_code is None
3516
3618
  and check_need_detect(SearchKey.COUNTRY)
3517
3619
  ):
3518
- maybe_key = CountrySearchKeyDetector().get_search_key_column(sample)
3519
- if maybe_key is not None:
3520
- search_keys[maybe_key] = SearchKey.COUNTRY
3521
- self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
3620
+ maybe_key = CountrySearchKeyDetector().get_search_key_columns(sample, search_keys)
3621
+ if maybe_key:
3622
+ search_keys[maybe_key[0]] = SearchKey.COUNTRY
3623
+ self.autodetected_search_keys[maybe_key[0]] = SearchKey.COUNTRY
3522
3624
  self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
3523
3625
  if not silent_mode:
3524
3626
  print(self.bundle.get("country_detected").format(maybe_key))
3525
3627
 
3526
3628
  if (
3527
- SearchKey.EMAIL not in search_keys.values()
3528
- and SearchKey.HEM not in search_keys.values()
3629
+ # SearchKey.EMAIL not in search_keys.values()
3630
+ SearchKey.HEM not in search_keys.values()
3529
3631
  and check_need_detect(SearchKey.HEM)
3530
3632
  ):
3531
- maybe_key = EmailSearchKeyDetector().get_search_key_column(sample)
3532
- if maybe_key is not None and maybe_key not in search_keys.keys():
3633
+ maybe_keys = EmailSearchKeyDetector().get_search_key_columns(sample, search_keys)
3634
+ if maybe_keys:
3533
3635
  if self.__is_registered or is_demo_dataset:
3534
- search_keys[maybe_key] = SearchKey.EMAIL
3535
- self.autodetected_search_keys[maybe_key] = SearchKey.EMAIL
3536
- self.logger.info(f"Autodetected search key EMAIL in column {maybe_key}")
3636
+ new_keys = {key: SearchKey.EMAIL for key in maybe_keys}
3637
+ search_keys.update(new_keys)
3638
+ self.autodetected_search_keys.update(new_keys)
3639
+ self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
3537
3640
  if not silent_mode:
3538
- print(self.bundle.get("email_detected").format(maybe_key))
3641
+ print(self.bundle.get("email_detected").format(maybe_keys))
3539
3642
  else:
3540
3643
  self.logger.warning(
3541
- f"Autodetected search key EMAIL in column {maybe_key}. But not used because not registered user"
3644
+ f"Autodetected search key EMAIL in column {maybe_keys}."
3645
+ " But not used because not registered user"
3542
3646
  )
3543
3647
  if not silent_mode:
3544
- print(self.bundle.get("email_detected_not_registered").format(maybe_key))
3648
+ print(self.bundle.get("email_detected_not_registered").format(maybe_keys))
3545
3649
  self.warning_counter.increment()
3546
3650
 
3547
- if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3548
- maybe_key = PhoneSearchKeyDetector().get_search_key_column(sample)
3549
- if maybe_key is not None and maybe_key not in search_keys.keys():
3651
+ # if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3652
+ if check_need_detect(SearchKey.PHONE):
3653
+ maybe_keys = PhoneSearchKeyDetector().get_search_key_columns(sample, search_keys)
3654
+ if maybe_keys:
3550
3655
  if self.__is_registered or is_demo_dataset:
3551
- search_keys[maybe_key] = SearchKey.PHONE
3552
- self.autodetected_search_keys[maybe_key] = SearchKey.PHONE
3553
- self.logger.info(f"Autodetected search key PHONE in column {maybe_key}")
3656
+ new_keys = {key: SearchKey.PHONE for key in maybe_keys}
3657
+ search_keys.update(new_keys)
3658
+ self.autodetected_search_keys.update(new_keys)
3659
+ self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
3554
3660
  if not silent_mode:
3555
- print(self.bundle.get("phone_detected").format(maybe_key))
3661
+ print(self.bundle.get("phone_detected").format(maybe_keys))
3556
3662
  else:
3557
3663
  self.logger.warning(
3558
- f"Autodetected search key PHONE in column {maybe_key}. But not used because not registered user"
3664
+ f"Autodetected search key PHONE in column {maybe_keys}. "
3665
+ "But not used because not registered user"
3559
3666
  )
3560
3667
  if not silent_mode:
3561
3668
  print(self.bundle.get("phone_detected_not_registered"))
upgini/fingerprint.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
3
+ * Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
4
+ *
5
+ * This software contains code from open-source projects:
6
+ * MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
7
+ */
8
+ var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
upgini/metadata.py CHANGED
@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Set
4
4
  from pydantic import BaseModel
5
5
 
6
6
  SYSTEM_RECORD_ID = "system_record_id"
7
+ ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
8
+ SEARCH_KEY_UNNEST = "search_key_unnest"
7
9
  SORT_ID = "sort_id"
8
10
  EVAL_SET_INDEX = "eval_set_index"
9
11
  TARGET = "target"
@@ -11,7 +13,7 @@ COUNTRY = "country_iso_code"
11
13
  RENAMED_INDEX = "index_col"
12
14
  DEFAULT_INDEX = "index"
13
15
  ORIGINAL_INDEX = "original_index"
14
- SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
16
+ SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
15
17
 
16
18
 
17
19
  class FileColumnMeaningType(Enum):
@@ -37,6 +39,8 @@ class FileColumnMeaningType(Enum):
37
39
  POSTAL_CODE = "POSTAL_CODE"
38
40
  SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
39
41
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
42
+ ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
43
+ UNNEST_KEY = "UNNEST_KEY"
40
44
 
41
45
 
42
46
  class SearchKey(Enum):
@@ -182,6 +186,10 @@ class FileColumnMetadata(BaseModel):
182
186
  meaningType: FileColumnMeaningType
183
187
  minMaxValues: Optional[NumericInterval] = None
184
188
  originalName: Optional[str]
189
+ # is this column contains keys from multiple key columns like msisdn1, msisdn2
190
+ isUnnest: bool = False,
191
+ # list of original etalon key column names like msisdn1, msisdn2
192
+ unnestKeyNames: Optional[list[str]]
185
193
 
186
194
 
187
195
  class FileMetadata(BaseModel):
@@ -87,6 +87,7 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
87
87
  search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
88
88
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
89
89
  single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
90
+ unsupported_multi_key=Search key {} cannot be used multiple times
90
91
  unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
91
92
  date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
92
93
  invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import List
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -10,16 +10,18 @@ class BaseSearchKeyDetector:
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
11
  raise NotImplementedError()
12
12
 
13
- def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
- for column_name in column_names:
15
- if self._is_search_key_by_name(column_name):
16
- return column_name
13
+ def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
+ return [
15
+ column_name
16
+ for column_name in column_names
17
+ if self._is_search_key_by_name(column_name)
18
+ ]
17
19
 
18
- def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
- maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
- if maybe_column is not None:
21
- return maybe_column
22
-
23
- for column_name in df.columns:
20
+ def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
+ other_columns = [col for col in df.columns if col not in existing_search_keys]
22
+ columns_by_names = self._get_search_keys_by_name(other_columns)
23
+ columns_by_values = []
24
+ for column_name in other_columns:
24
25
  if self._is_search_key_by_values(df[column_name]):
25
- return column_name
26
+ columns_by_values.append(column_name)
27
+ return list(set(columns_by_names + columns_by_values))
@@ -3,7 +3,15 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
6
+ from upgini.metadata import (
7
+ ENTITY_SYSTEM_RECORD_ID,
8
+ EVAL_SET_INDEX,
9
+ SORT_ID,
10
+ SYSTEM_RECORD_ID,
11
+ TARGET,
12
+ ModelTaskType,
13
+ SearchKey,
14
+ )
7
15
  from upgini.resource_bundle import ResourceBundle
8
16
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
9
17
  from upgini.utils.target_utils import define_task
@@ -143,6 +151,8 @@ def clean_full_duplicates(
143
151
  unique_columns = df.columns.tolist()
144
152
  if SYSTEM_RECORD_ID in unique_columns:
145
153
  unique_columns.remove(SYSTEM_RECORD_ID)
154
+ if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
+ unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
146
156
  if SORT_ID in unique_columns:
147
157
  unique_columns.remove(SORT_ID)
148
158
  if EVAL_SET_INDEX in unique_columns:
@@ -38,11 +38,13 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
+ unnest_search_keys: Optional[List[str]] = None,
41
42
  logger: Optional[logging.Logger] = None,
42
43
  ):
43
44
  self.email_column = email_column
44
45
  self.hem_column = hem_column
45
46
  self.search_keys = search_keys
47
+ self.unnest_search_keys = unnest_search_keys
46
48
  if logger is not None:
47
49
  self.logger = logger
48
50
  else:
@@ -80,9 +82,12 @@ class EmailSearchKeyConverter:
80
82
  del self.search_keys[self.email_column]
81
83
  return df
82
84
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
+ self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
83
86
  self.email_converted_to_hem = True
84
87
 
85
88
  del self.search_keys[self.email_column]
89
+ if self.email_column in self.unnest_search_keys:
90
+ self.unnest_search_keys.remove(self.email_column)
86
91
 
87
92
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
88
93
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.274a3388.post2
3
+ Version: 1.1.275a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -1,10 +1,11 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=xb4gIANyGbdcuM8Awyq2pJPiH_3k_LEbETApJgAoRBA,45529
3
+ upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=LPYSCGq89WLaL5iQNikTyhICUs_APtqEvhn5XRENn1U,174105
5
+ upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
6
+ upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
6
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
7
- upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
8
+ upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
8
9
  upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
9
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
10
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
@@ -14,10 +15,10 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
14
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
16
  upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
16
17
  upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
17
- upgini/autofe/date.py,sha256=DreiGPDrPT5hThjmtp6_LOsByWV6FP0XlWhpPEYeEQo,4610
18
- upgini/autofe/feature.py,sha256=LAVXZL5zJEwZbuhB8Zqcrbev7yDvTveTnkEUKpMx21U,14434
18
+ upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
19
+ upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
19
20
  upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
20
- upgini/autofe/operand.py,sha256=1eF3-6-dW7MwD34LH-csTDN2xqcyFuIJbIUKU59bjB0,3012
21
+ upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
21
22
  upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
22
23
  upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
23
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,22 +29,22 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
28
29
  upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
29
30
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
30
31
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
31
- upgini/resource_bundle/strings.properties,sha256=TM9OykiEXNpcgFN3DpqBGbQs4N9m4mzHBn-k6aazc30,26111
32
+ upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
32
33
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
33
34
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
35
36
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
36
37
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
37
38
  upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
38
- upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
39
+ upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
39
40
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
40
41
  upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
41
42
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
42
43
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
43
44
  upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
44
- upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
45
+ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
45
46
  upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
46
- upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
47
+ upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
47
48
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
48
49
  upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
49
50
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
@@ -55,8 +56,8 @@ upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,4
55
56
  upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
56
57
  upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
57
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
58
- upgini-1.1.274a3388.post2.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
- upgini-1.1.274a3388.post2.dist-info/METADATA,sha256=vF30QjGCqiXg5iU8_Gp299EHu5uvlUEoN9e04VlpCSw,48167
60
- upgini-1.1.274a3388.post2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
61
- upgini-1.1.274a3388.post2.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
- upgini-1.1.274a3388.post2.dist-info/RECORD,,
59
+ upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
+ upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
61
+ upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
+ upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
+ upgini-1.1.275a1.dist-info/RECORD,,