upgini 1.1.278a2__py3-none-any.whl → 1.1.279a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/dataset.py CHANGED
@@ -23,9 +23,7 @@ from pandas.api.types import (
23
23
  from upgini.errors import ValidationError
24
24
  from upgini.http import ProgressStage, SearchProgress, _RestClient
25
25
  from upgini.metadata import (
26
- ENTITY_SYSTEM_RECORD_ID,
27
26
  EVAL_SET_INDEX,
28
- SEARCH_KEY_UNNEST,
29
27
  SYSTEM_COLUMNS,
30
28
  SYSTEM_RECORD_ID,
31
29
  TARGET,
@@ -81,7 +79,6 @@ class Dataset: # (pd.DataFrame):
81
79
  path: Optional[str] = None,
82
80
  meaning_types: Optional[Dict[str, FileColumnMeaningType]] = None,
83
81
  search_keys: Optional[List[Tuple[str, ...]]] = None,
84
- unnest_search_keys: Optional[Dict[str, str]] = None,
85
82
  model_task_type: Optional[ModelTaskType] = None,
86
83
  random_state: Optional[int] = None,
87
84
  rest_client: Optional[_RestClient] = None,
@@ -116,7 +113,6 @@ class Dataset: # (pd.DataFrame):
116
113
  self.description = description
117
114
  self.meaning_types = meaning_types
118
115
  self.search_keys = search_keys
119
- self.unnest_search_keys = unnest_search_keys
120
116
  self.ignore_columns = []
121
117
  self.hierarchical_group_keys = []
122
118
  self.hierarchical_subgroup_keys = []
@@ -176,7 +172,7 @@ class Dataset: # (pd.DataFrame):
176
172
  new_columns = []
177
173
  dup_counter = 0
178
174
  for column in self.data.columns:
179
- if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]:
175
+ if column in [TARGET, EVAL_SET_INDEX, SYSTEM_RECORD_ID]:
180
176
  self.columns_renaming[column] = column
181
177
  new_columns.append(column)
182
178
  continue
@@ -357,9 +353,7 @@ class Dataset: # (pd.DataFrame):
357
353
 
358
354
  if is_string_dtype(self.data[postal_code]) or is_object_dtype(self.data[postal_code]):
359
355
  try:
360
- self.data[postal_code] = (
361
- self.data[postal_code].astype("string").astype("Float64").astype("Int64").astype("string")
362
- )
356
+ self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
363
357
  except Exception:
364
358
  pass
365
359
  elif is_float_dtype(self.data[postal_code]):
@@ -809,9 +803,6 @@ class Dataset: # (pd.DataFrame):
809
803
  meaningType=meaning_type,
810
804
  minMaxValues=min_max_values,
811
805
  )
812
- if self.unnest_search_keys and column_meta.originalName in self.unnest_search_keys:
813
- column_meta.isUnnest = True
814
- column_meta.unnestKeyNames = self.unnest_search_keys[column_meta.originalName]
815
806
 
816
807
  columns.append(column_meta)
817
808
 
@@ -11,7 +11,6 @@ import sys
11
11
  import tempfile
12
12
  import time
13
13
  import uuid
14
- from collections import Counter
15
14
  from dataclasses import dataclass
16
15
  from threading import Thread
17
16
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -46,11 +45,9 @@ from upgini.mdc import MDC
46
45
  from upgini.metadata import (
47
46
  COUNTRY,
48
47
  DEFAULT_INDEX,
49
- ENTITY_SYSTEM_RECORD_ID,
50
48
  EVAL_SET_INDEX,
51
49
  ORIGINAL_INDEX,
52
50
  RENAMED_INDEX,
53
- SEARCH_KEY_UNNEST,
54
51
  SORT_ID,
55
52
  SYSTEM_RECORD_ID,
56
53
  TARGET,
@@ -251,7 +248,7 @@ class FeaturesEnricher(TransformerMixin):
251
248
  self.__cached_sampled_datasets: Optional[Tuple[pd.DataFrame, pd.DataFrame, pd.Series, Dict, Dict]] = None
252
249
 
253
250
  validate_version(self.logger)
254
- self.search_keys = search_keys or {}
251
+ self.search_keys = search_keys or dict()
255
252
  self.country_code = country_code
256
253
  self.__validate_search_keys(search_keys, search_id)
257
254
  self.model_task_type = model_task_type
@@ -1191,7 +1188,7 @@ class FeaturesEnricher(TransformerMixin):
1191
1188
  email_column = self._get_email_column(search_keys)
1192
1189
  hem_column = self._get_hem_column(search_keys)
1193
1190
  if email_column:
1194
- converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
1191
+ converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1195
1192
  extended_X = converter.convert(extended_X)
1196
1193
  generated_features.extend(converter.generated_features)
1197
1194
  if (
@@ -1343,7 +1340,7 @@ class FeaturesEnricher(TransformerMixin):
1343
1340
  not in (
1344
1341
  excluding_search_keys
1345
1342
  + list(self.fit_dropped_features)
1346
- + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1343
+ + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID]
1347
1344
  )
1348
1345
  ]
1349
1346
 
@@ -1407,7 +1404,7 @@ class FeaturesEnricher(TransformerMixin):
1407
1404
  fitting_enriched_X[col].astype("string").str.replace(",", ".").astype(np.float64)
1408
1405
  )
1409
1406
 
1410
- fitting_eval_set_dict = {}
1407
+ fitting_eval_set_dict = dict()
1411
1408
  for idx, eval_tuple in eval_set_sampled_dict.items():
1412
1409
  eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
1413
1410
  eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
@@ -1519,7 +1516,7 @@ class FeaturesEnricher(TransformerMixin):
1519
1516
  def __sample_only_input(
1520
1517
  self, validated_X: pd.DataFrame, validated_y: pd.Series, eval_set: Optional[List[tuple]], is_demo_dataset: bool
1521
1518
  ) -> _SampledDataForMetrics:
1522
- eval_set_sampled_dict = {}
1519
+ eval_set_sampled_dict = dict()
1523
1520
 
1524
1521
  df = validated_X.copy()
1525
1522
  df[TARGET] = validated_y
@@ -1545,7 +1542,7 @@ class FeaturesEnricher(TransformerMixin):
1545
1542
  df = df.sample(n=sample_rows, random_state=self.random_state)
1546
1543
 
1547
1544
  df_extended, search_keys = self._extend_x(df, is_demo_dataset)
1548
- df_extended = self.__add_fit_system_record_id(df_extended, {}, search_keys)
1545
+ df_extended = self.__add_fit_system_record_id(df_extended, dict(), search_keys)
1549
1546
 
1550
1547
  train_df = df_extended.query(f"{EVAL_SET_INDEX} == 0") if eval_set is not None else df_extended
1551
1548
  X_sampled = train_df.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
@@ -1569,7 +1566,7 @@ class FeaturesEnricher(TransformerMixin):
1569
1566
  trace_id: str,
1570
1567
  remove_outliers_calc_metrics: Optional[bool],
1571
1568
  ) -> _SampledDataForMetrics:
1572
- eval_set_sampled_dict = {}
1569
+ eval_set_sampled_dict = dict()
1573
1570
  search_keys = self.fit_search_keys
1574
1571
 
1575
1572
  rows_to_drop = None
@@ -1643,7 +1640,7 @@ class FeaturesEnricher(TransformerMixin):
1643
1640
  progress_bar: Optional[ProgressBar],
1644
1641
  progress_callback: Optional[Callable[[SearchProgress], Any]],
1645
1642
  ) -> _SampledDataForMetrics:
1646
- eval_set_sampled_dict = {}
1643
+ eval_set_sampled_dict = dict()
1647
1644
  if eval_set is not None:
1648
1645
  self.logger.info("Transform with eval_set")
1649
1646
  # concatenate X and eval_set with eval_set_index
@@ -1665,7 +1662,7 @@ class FeaturesEnricher(TransformerMixin):
1665
1662
  self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS}")
1666
1663
  df = df.sample(n=Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS, random_state=self.random_state)
1667
1664
 
1668
- eval_set_sampled_dict = {}
1665
+ eval_set_sampled_dict = dict()
1669
1666
 
1670
1667
  tmp_target_name = "__target"
1671
1668
  df = df.rename(columns={TARGET: tmp_target_name})
@@ -1928,38 +1925,11 @@ class FeaturesEnricher(TransformerMixin):
1928
1925
  self.logger.info("Input dataset hasn't date column")
1929
1926
  if self.add_date_if_missing:
1930
1927
  df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
1931
-
1932
- # Don't pass all features in backend on transform
1933
- original_features_for_transform = []
1934
- runtime_parameters = self._get_copy_of_runtime_parameters()
1935
- features_not_to_pass = [column for column in df.columns if column not in search_keys.keys()]
1936
- if len(features_not_to_pass) > 0:
1937
- # Pass only features that need for transform
1938
- features_for_transform = self._search_task.get_features_for_transform()
1939
- if features_for_transform is not None and len(features_for_transform) > 0:
1940
- file_metadata = self._search_task.get_file_metadata(trace_id)
1941
- original_features_for_transform = [
1942
- c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1943
- ]
1944
-
1945
- runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1946
-
1947
- columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1948
-
1949
- df[ENTITY_SYSTEM_RECORD_ID] = pd.util.hash_pandas_object(
1950
- df[columns_for_system_record_id], index=False
1951
- ).astype("Float64")
1952
-
1953
- # Explode multiple search keys
1954
- df, unnest_search_keys = self._explode_multiple_search_keys(df, search_keys)
1955
-
1956
1928
  email_column = self._get_email_column(search_keys)
1957
1929
  hem_column = self._get_hem_column(search_keys)
1958
1930
  email_converted_to_hem = False
1959
1931
  if email_column:
1960
- converter = EmailSearchKeyConverter(
1961
- email_column, hem_column, search_keys, list(unnest_search_keys.keys()), self.logger
1962
- )
1932
+ converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
1963
1933
  df = converter.convert(df)
1964
1934
  generated_features.extend(converter.generated_features)
1965
1935
  email_converted_to_hem = converter.email_converted_to_hem
@@ -1973,21 +1943,30 @@ class FeaturesEnricher(TransformerMixin):
1973
1943
  generated_features = [f for f in generated_features if f in self.fit_generated_features]
1974
1944
 
1975
1945
  meaning_types = {col: key.value for col, key in search_keys.items()}
1976
- # non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1977
- for col in original_features_for_transform:
1978
- meaning_types[col] = FileColumnMeaningType.FEATURE
1979
- features_not_to_pass = [column for column in features_not_to_pass if column not in search_keys.keys()]
1946
+ non_keys_columns = [column for column in df.columns if column not in search_keys.keys()]
1980
1947
 
1981
1948
  if email_converted_to_hem:
1982
- features_not_to_pass.append(email_column)
1949
+ non_keys_columns.append(email_column)
1950
+
1951
+ # Don't pass features in backend on transform
1952
+ original_features_for_transform = None
1953
+ runtime_parameters = self._get_copy_of_runtime_parameters()
1954
+ if len(non_keys_columns) > 0:
1955
+ # Pass only features that need for transform
1956
+ features_for_transform = self._search_task.get_features_for_transform()
1957
+ if features_for_transform is not None and len(features_for_transform) > 0:
1958
+ file_metadata = self._search_task.get_file_metadata(trace_id)
1959
+ original_features_for_transform = [
1960
+ c.originalName or c.name for c in file_metadata.columns if c.name in features_for_transform
1961
+ ]
1962
+ non_keys_columns = [c for c in non_keys_columns if c not in original_features_for_transform]
1983
1963
 
1984
- features_not_to_pass = [c for c in features_not_to_pass if c not in original_features_for_transform]
1985
- columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform))
1964
+ runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
1986
1965
 
1987
1966
  if add_fit_system_record_id:
1988
- df = self.__add_fit_system_record_id(df, {}, search_keys)
1967
+ df = self.__add_fit_system_record_id(df, dict(), search_keys)
1989
1968
  df = df.rename(columns={SYSTEM_RECORD_ID: SORT_ID})
1990
- features_not_to_pass.append(SORT_ID)
1969
+ non_keys_columns.append(SORT_ID)
1991
1970
 
1992
1971
  columns_for_system_record_id = sorted(list(search_keys.keys()) + (original_features_for_transform or []))
1993
1972
 
@@ -1995,19 +1974,16 @@ class FeaturesEnricher(TransformerMixin):
1995
1974
  "Float64"
1996
1975
  )
1997
1976
  meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
1998
- meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
1999
- if SEARCH_KEY_UNNEST in df.columns:
2000
- meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
2001
1977
 
2002
1978
  df = df.reset_index(drop=True)
2003
- system_columns_with_original_index = [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID] + generated_features
1979
+ system_columns_with_original_index = [SYSTEM_RECORD_ID] + generated_features
2004
1980
  if add_fit_system_record_id:
2005
1981
  system_columns_with_original_index.append(SORT_ID)
2006
1982
  df_with_original_index = df[system_columns_with_original_index].copy()
2007
1983
 
2008
1984
  combined_search_keys = combine_search_keys(search_keys.keys())
2009
1985
 
2010
- df_without_features = df.drop(columns=features_not_to_pass)
1986
+ df_without_features = df.drop(columns=non_keys_columns)
2011
1987
 
2012
1988
  df_without_features = clean_full_duplicates(
2013
1989
  df_without_features, self.logger, silent=silent_mode, bundle=self.bundle
@@ -2019,13 +1995,12 @@ class FeaturesEnricher(TransformerMixin):
2019
1995
  dataset = Dataset(
2020
1996
  "sample_" + str(uuid.uuid4()),
2021
1997
  df=df_without_features,
2022
- meaning_types=meaning_types,
2023
- search_keys=combined_search_keys,
2024
- unnest_search_keys=unnest_search_keys,
2025
1998
  date_format=self.date_format,
2026
1999
  rest_client=self.rest_client,
2027
2000
  logger=self.logger,
2028
2001
  )
2002
+ dataset.meaning_types = meaning_types
2003
+ dataset.search_keys = combined_search_keys
2029
2004
  if email_converted_to_hem:
2030
2005
  dataset.ignore_columns = [email_column]
2031
2006
 
@@ -2164,14 +2139,6 @@ class FeaturesEnricher(TransformerMixin):
2164
2139
 
2165
2140
  key_types = search_keys.values()
2166
2141
 
2167
- # Multiple search keys allowed only for PHONE, IP, POSTAL_CODE, EMAIL, HEM
2168
- multi_keys = [key for key, count in Counter(key_types).items() if count > 1]
2169
- for multi_key in multi_keys:
2170
- if multi_key not in [SearchKey.PHONE, SearchKey.IP, SearchKey.POSTAL_CODE, SearchKey.EMAIL, SearchKey.HEM]:
2171
- msg = self.bundle.get("unsupported_multi_key").format(multi_key)
2172
- self.logger.warning(msg)
2173
- raise ValidationError(msg)
2174
-
2175
2142
  if SearchKey.DATE in key_types and SearchKey.DATETIME in key_types:
2176
2143
  msg = self.bundle.get("date_and_datetime_simultanious")
2177
2144
  self.logger.warning(msg)
@@ -2187,11 +2154,11 @@ class FeaturesEnricher(TransformerMixin):
2187
2154
  self.logger.warning(msg)
2188
2155
  raise ValidationError(msg)
2189
2156
 
2190
- # for key_type in SearchKey.__members__.values():
2191
- # if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2192
- # msg = self.bundle.get("multiple_search_key").format(key_type)
2193
- # self.logger.warning(msg)
2194
- # raise ValidationError(msg)
2157
+ for key_type in SearchKey.__members__.values():
2158
+ if key_type != SearchKey.CUSTOM_KEY and list(key_types).count(key_type) > 1:
2159
+ msg = self.bundle.get("multiple_search_key").format(key_type)
2160
+ self.logger.warning(msg)
2161
+ raise ValidationError(msg)
2195
2162
 
2196
2163
  # non_personal_keys = set(SearchKey.__members__.values()) - set(SearchKey.personal_keys())
2197
2164
  # if (
@@ -2329,6 +2296,14 @@ class FeaturesEnricher(TransformerMixin):
2329
2296
  self.logger.info("Input dataset hasn't date column")
2330
2297
  if self.add_date_if_missing:
2331
2298
  df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2299
+ email_column = self._get_email_column(self.fit_search_keys)
2300
+ hem_column = self._get_hem_column(self.fit_search_keys)
2301
+ email_converted_to_hem = False
2302
+ if email_column:
2303
+ converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
2304
+ df = converter.convert(df)
2305
+ self.fit_generated_features.extend(converter.generated_features)
2306
+ email_converted_to_hem = converter.email_converted_to_hem
2332
2307
  if (
2333
2308
  self.detect_missing_search_keys
2334
2309
  and list(self.fit_search_keys.values()) == [SearchKey.DATE]
@@ -2337,37 +2312,7 @@ class FeaturesEnricher(TransformerMixin):
2337
2312
  converter = IpToCountrySearchKeyConverter(self.fit_search_keys, self.logger)
2338
2313
  df = converter.convert(df)
2339
2314
 
2340
- # Explode multiple search keys
2341
2315
  non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
2342
- meaning_types = {
2343
- **{col: key.value for col, key in self.fit_search_keys.items()},
2344
- **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2345
- }
2346
- meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2347
- if eval_set is not None and len(eval_set) > 0:
2348
- meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2349
- df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, ENTITY_SYSTEM_RECORD_ID)
2350
-
2351
- # TODO check that this is correct for enrichment
2352
- self.df_with_original_index = df.copy()
2353
-
2354
- df, unnest_search_keys = self._explode_multiple_search_keys(df, self.fit_search_keys)
2355
-
2356
- # Convert EMAIL to HEM after unnesting to do it only with one column
2357
- email_column = self._get_email_column(self.fit_search_keys)
2358
- hem_column = self._get_hem_column(self.fit_search_keys)
2359
- email_converted_to_hem = False
2360
- if email_column:
2361
- converter = EmailSearchKeyConverter(
2362
- email_column, hem_column, self.fit_search_keys, list(unnest_search_keys.keys()), self.logger
2363
- )
2364
- df = converter.convert(df)
2365
- self.fit_generated_features.extend(converter.generated_features)
2366
- email_converted_to_hem = converter.email_converted_to_hem
2367
-
2368
- non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
2369
- self.fit_search_keys.keys()
2370
- )
2371
2316
  if email_converted_to_hem:
2372
2317
  non_feature_columns.append(email_column)
2373
2318
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
@@ -2391,14 +2336,12 @@ class FeaturesEnricher(TransformerMixin):
2391
2336
  **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2392
2337
  }
2393
2338
  meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2394
- meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
2395
- if SEARCH_KEY_UNNEST in df.columns:
2396
- meaning_types[SEARCH_KEY_UNNEST] = FileColumnMeaningType.UNNEST_KEY
2397
2339
  if eval_set is not None and len(eval_set) > 0:
2398
2340
  meaning_types[EVAL_SET_INDEX] = FileColumnMeaningType.EVAL_SET_INDEX
2399
2341
 
2400
- df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys, SYSTEM_RECORD_ID)
2342
+ df = self.__add_fit_system_record_id(df, meaning_types, self.fit_search_keys)
2401
2343
 
2344
+ self.df_with_original_index = df.copy()
2402
2345
  df = df.reset_index(drop=True).sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
2403
2346
 
2404
2347
  combined_search_keys = combine_search_keys(self.fit_search_keys.keys())
@@ -2406,15 +2349,14 @@ class FeaturesEnricher(TransformerMixin):
2406
2349
  dataset = Dataset(
2407
2350
  "tds_" + str(uuid.uuid4()),
2408
2351
  df=df,
2409
- meaning_types=meaning_types,
2410
- search_keys=combined_search_keys,
2411
- unnest_search_keys=unnest_search_keys,
2412
2352
  model_task_type=model_task_type,
2413
2353
  date_format=self.date_format,
2414
2354
  random_state=self.random_state,
2415
2355
  rest_client=self.rest_client,
2416
2356
  logger=self.logger,
2417
2357
  )
2358
+ dataset.meaning_types = meaning_types
2359
+ dataset.search_keys = combined_search_keys
2418
2360
  if email_converted_to_hem:
2419
2361
  dataset.ignore_columns = [email_column]
2420
2362
 
@@ -2784,10 +2726,9 @@ class FeaturesEnricher(TransformerMixin):
2784
2726
  X: pd.DataFrame, y: pd.Series, cv: Optional[CVType]
2785
2727
  ) -> Tuple[pd.DataFrame, pd.Series]:
2786
2728
  if cv not in [CVType.time_series, CVType.blocked_time_series]:
2787
- record_id_column = ENTITY_SYSTEM_RECORD_ID if ENTITY_SYSTEM_RECORD_ID in X else SYSTEM_RECORD_ID
2788
2729
  Xy = X.copy()
2789
2730
  Xy[TARGET] = y
2790
- Xy = Xy.sort_values(by=record_id_column).reset_index(drop=True)
2731
+ Xy = Xy.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True)
2791
2732
  X = Xy.drop(columns=TARGET)
2792
2733
  y = Xy[TARGET].copy()
2793
2734
 
@@ -2964,19 +2905,15 @@ class FeaturesEnricher(TransformerMixin):
2964
2905
 
2965
2906
  @staticmethod
2966
2907
  def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2967
- cols = [col for col, t in search_keys.items() if t == SearchKey.EMAIL]
2968
- if len(cols) > 1:
2969
- raise Exception("More than one email column found after unnest")
2970
- if len(cols) == 1:
2971
- return cols[0]
2908
+ for col, t in search_keys.items():
2909
+ if t == SearchKey.EMAIL:
2910
+ return col
2972
2911
 
2973
2912
  @staticmethod
2974
2913
  def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
2975
- cols = [col for col, t in search_keys.items() if t == SearchKey.HEM]
2976
- if len(cols) > 1:
2977
- raise Exception("More than one hem column found after unnest")
2978
- if len(cols) == 1:
2979
- return cols[0]
2914
+ for col, t in search_keys.items():
2915
+ if t == SearchKey.HEM:
2916
+ return col
2980
2917
 
2981
2918
  @staticmethod
2982
2919
  def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
@@ -2984,44 +2921,8 @@ class FeaturesEnricher(TransformerMixin):
2984
2921
  if t == SearchKey.PHONE:
2985
2922
  return col
2986
2923
 
2987
- def _explode_multiple_search_keys(
2988
- self, df: pd.DataFrame, search_keys: Dict[str, SearchKey]
2989
- ) -> Tuple[pd.DataFrame, Dict[str, List[str]]]:
2990
- # find groups of multiple search keys
2991
- search_key_names_by_type: Dict[SearchKey, str] = {}
2992
- for key_name, key_type in search_keys.items():
2993
- search_key_names_by_type[key_type] = search_key_names_by_type.get(key_type, []) + [key_name]
2994
- search_key_names_by_type = {
2995
- key_type: key_names for key_type, key_names in search_key_names_by_type.items() if len(key_names) > 1
2996
- }
2997
- if len(search_key_names_by_type) == 0:
2998
- return df, {}
2999
-
3000
- multiple_keys_columns = [col for cols in search_key_names_by_type.values() for col in cols]
3001
- other_columns = [col for col in df.columns if col not in multiple_keys_columns]
3002
- exploded_dfs = []
3003
- unnest_search_keys = {}
3004
-
3005
- for key_type, key_names in search_key_names_by_type.items():
3006
- new_search_key = f"upgini_{key_type.name.lower()}_unnest"
3007
- exploded_df = pd.melt(
3008
- df, id_vars=other_columns, value_vars=key_names, var_name=SEARCH_KEY_UNNEST, value_name=new_search_key
3009
- )
3010
- exploded_dfs.append(exploded_df)
3011
- for old_key in key_names:
3012
- del search_keys[old_key]
3013
- search_keys[new_search_key] = key_type
3014
- unnest_search_keys[new_search_key] = key_names
3015
-
3016
- df = pd.concat(exploded_dfs, ignore_index=True)
3017
- return df, unnest_search_keys
3018
-
3019
2924
  def __add_fit_system_record_id(
3020
- self,
3021
- df: pd.DataFrame,
3022
- meaning_types: Dict[str, FileColumnMeaningType],
3023
- search_keys: Dict[str, SearchKey],
3024
- id_name: str,
2925
+ self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
3025
2926
  ) -> pd.DataFrame:
3026
2927
  # save original order or rows
3027
2928
  original_index_name = df.index.name
@@ -3070,18 +2971,14 @@ class FeaturesEnricher(TransformerMixin):
3070
2971
 
3071
2972
  df = df.reset_index(drop=True).reset_index()
3072
2973
  # system_record_id saves correct order for fit
3073
- df = df.rename(columns={DEFAULT_INDEX: id_name})
2974
+ df = df.rename(columns={DEFAULT_INDEX: SYSTEM_RECORD_ID})
3074
2975
 
3075
2976
  # return original order
3076
2977
  df = df.set_index(ORIGINAL_INDEX)
3077
2978
  df.index.name = original_index_name
3078
2979
  df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
3079
2980
 
3080
- meaning_types[id_name] = (
3081
- FileColumnMeaningType.SYSTEM_RECORD_ID
3082
- if id_name == SYSTEM_RECORD_ID
3083
- else FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
3084
- )
2981
+ meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
3085
2982
  return df
3086
2983
 
3087
2984
  def __correct_target(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -3136,11 +3033,7 @@ class FeaturesEnricher(TransformerMixin):
3136
3033
  )
3137
3034
 
3138
3035
  comparing_columns = X.columns if is_transform else df_with_original_index.columns
3139
- dup_features = [
3140
- c
3141
- for c in comparing_columns
3142
- if c in result_features.columns and c not in [SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
3143
- ]
3036
+ dup_features = [c for c in comparing_columns if c in result_features.columns and c != SYSTEM_RECORD_ID]
3144
3037
  if len(dup_features) > 0:
3145
3038
  self.logger.warning(f"X contain columns with same name as returned from backend: {dup_features}")
3146
3039
  raise ValidationError(self.bundle.get("returned_features_same_as_passed").format(dup_features))
@@ -3151,7 +3044,8 @@ class FeaturesEnricher(TransformerMixin):
3151
3044
  result_features = pd.merge(
3152
3045
  df_with_original_index,
3153
3046
  result_features,
3154
- on=ENTITY_SYSTEM_RECORD_ID,
3047
+ left_on=SYSTEM_RECORD_ID,
3048
+ right_on=SYSTEM_RECORD_ID,
3155
3049
  how="left" if is_transform else "inner",
3156
3050
  )
3157
3051
  result_features = result_features.set_index(original_index_name or DEFAULT_INDEX)
@@ -3162,7 +3056,7 @@ class FeaturesEnricher(TransformerMixin):
3162
3056
  result_features = result_features[~result_features[SYSTEM_RECORD_ID].isin(rows_to_drop[SYSTEM_RECORD_ID])]
3163
3057
  self.logger.info(f"After dropping target outliers size: {len(result_features)}")
3164
3058
 
3165
- result_eval_sets = {}
3059
+ result_eval_sets = dict()
3166
3060
  if not is_transform and EVAL_SET_INDEX in result_features.columns:
3167
3061
  result_train_features = result_features.loc[result_features[EVAL_SET_INDEX] == 0].copy()
3168
3062
  eval_set_indices = list(result_features[EVAL_SET_INDEX].unique())
@@ -3368,7 +3262,7 @@ class FeaturesEnricher(TransformerMixin):
3368
3262
  if autofe_feature.op.is_vector:
3369
3263
  continue
3370
3264
 
3371
- description = {}
3265
+ description = dict()
3372
3266
 
3373
3267
  feature_meta = get_feature_by_name(autofe_feature.get_display_name(shorten=True))
3374
3268
  if feature_meta is None:
@@ -3534,13 +3428,13 @@ class FeaturesEnricher(TransformerMixin):
3534
3428
  self.warning_counter.increment()
3535
3429
 
3536
3430
  if len(valid_search_keys) == 1:
3537
- key, value = list(valid_search_keys.items())[0]
3538
- # Show warning for country only if country is the only key
3539
- if x[key].nunique() == 1:
3540
- msg = self.bundle.get("single_constant_search_key").format(value, x[key].values[0])
3541
- print(msg)
3542
- self.logger.warning(msg)
3543
- self.warning_counter.increment()
3431
+ for k, v in valid_search_keys.items():
3432
+ # Show warning for country only if country is the only key
3433
+ if x[k].nunique() == 1 and (v != SearchKey.COUNTRY or len(valid_search_keys) == 1):
3434
+ msg = self.bundle.get("single_constant_search_key").format(v, x[k].values[0])
3435
+ print(msg)
3436
+ self.logger.warning(msg)
3437
+ self.warning_counter.increment()
3544
3438
 
3545
3439
  self.logger.info(f"Prepared search keys: {valid_search_keys}")
3546
3440
 
@@ -3650,68 +3544,61 @@ class FeaturesEnricher(TransformerMixin):
3650
3544
  def check_need_detect(search_key: SearchKey):
3651
3545
  return not is_transform or search_key in self.fit_search_keys.values()
3652
3546
 
3653
- # if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3654
- if check_need_detect(SearchKey.POSTAL_CODE):
3655
- maybe_keys = PostalCodeSearchKeyDetector().get_search_key_columns(sample, search_keys)
3656
- if maybe_keys:
3657
- new_keys = {key: SearchKey.POSTAL_CODE for key in maybe_keys}
3658
- search_keys.update(new_keys)
3659
- self.autodetected_search_keys.update(new_keys)
3660
- self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_keys}")
3547
+ if SearchKey.POSTAL_CODE not in search_keys.values() and check_need_detect(SearchKey.POSTAL_CODE):
3548
+ maybe_key = PostalCodeSearchKeyDetector().get_search_key_column(sample)
3549
+ if maybe_key is not None:
3550
+ search_keys[maybe_key] = SearchKey.POSTAL_CODE
3551
+ self.autodetected_search_keys[maybe_key] = SearchKey.POSTAL_CODE
3552
+ self.logger.info(f"Autodetected search key POSTAL_CODE in column {maybe_key}")
3661
3553
  if not silent_mode:
3662
- print(self.bundle.get("postal_code_detected").format(maybe_keys))
3554
+ print(self.bundle.get("postal_code_detected").format(maybe_key))
3663
3555
 
3664
3556
  if (
3665
3557
  SearchKey.COUNTRY not in search_keys.values()
3666
3558
  and self.country_code is None
3667
3559
  and check_need_detect(SearchKey.COUNTRY)
3668
3560
  ):
3669
- maybe_key = CountrySearchKeyDetector().get_search_key_columns(sample, search_keys)
3670
- if maybe_key:
3671
- search_keys[maybe_key[0]] = SearchKey.COUNTRY
3672
- self.autodetected_search_keys[maybe_key[0]] = SearchKey.COUNTRY
3561
+ maybe_key = CountrySearchKeyDetector().get_search_key_column(sample)
3562
+ if maybe_key is not None:
3563
+ search_keys[maybe_key] = SearchKey.COUNTRY
3564
+ self.autodetected_search_keys[maybe_key] = SearchKey.COUNTRY
3673
3565
  self.logger.info(f"Autodetected search key COUNTRY in column {maybe_key}")
3674
3566
  if not silent_mode:
3675
3567
  print(self.bundle.get("country_detected").format(maybe_key))
3676
3568
 
3677
3569
  if (
3678
- # SearchKey.EMAIL not in search_keys.values()
3679
- SearchKey.HEM not in search_keys.values()
3570
+ SearchKey.EMAIL not in search_keys.values()
3571
+ and SearchKey.HEM not in search_keys.values()
3680
3572
  and check_need_detect(SearchKey.HEM)
3681
3573
  ):
3682
- maybe_keys = EmailSearchKeyDetector().get_search_key_columns(sample, search_keys)
3683
- if maybe_keys:
3574
+ maybe_key = EmailSearchKeyDetector().get_search_key_column(sample)
3575
+ if maybe_key is not None and maybe_key not in search_keys.keys():
3684
3576
  if self.__is_registered or is_demo_dataset:
3685
- new_keys = {key: SearchKey.EMAIL for key in maybe_keys}
3686
- search_keys.update(new_keys)
3687
- self.autodetected_search_keys.update(new_keys)
3688
- self.logger.info(f"Autodetected search key EMAIL in column {maybe_keys}")
3577
+ search_keys[maybe_key] = SearchKey.EMAIL
3578
+ self.autodetected_search_keys[maybe_key] = SearchKey.EMAIL
3579
+ self.logger.info(f"Autodetected search key EMAIL in column {maybe_key}")
3689
3580
  if not silent_mode:
3690
- print(self.bundle.get("email_detected").format(maybe_keys))
3581
+ print(self.bundle.get("email_detected").format(maybe_key))
3691
3582
  else:
3692
3583
  self.logger.warning(
3693
- f"Autodetected search key EMAIL in column {maybe_keys}."
3694
- " But not used because not registered user"
3584
+ f"Autodetected search key EMAIL in column {maybe_key}. But not used because not registered user"
3695
3585
  )
3696
3586
  if not silent_mode:
3697
- print(self.bundle.get("email_detected_not_registered").format(maybe_keys))
3587
+ print(self.bundle.get("email_detected_not_registered").format(maybe_key))
3698
3588
  self.warning_counter.increment()
3699
3589
 
3700
- # if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3701
- if check_need_detect(SearchKey.PHONE):
3702
- maybe_keys = PhoneSearchKeyDetector().get_search_key_columns(sample, search_keys)
3703
- if maybe_keys:
3590
+ if SearchKey.PHONE not in search_keys.values() and check_need_detect(SearchKey.PHONE):
3591
+ maybe_key = PhoneSearchKeyDetector().get_search_key_column(sample)
3592
+ if maybe_key is not None and maybe_key not in search_keys.keys():
3704
3593
  if self.__is_registered or is_demo_dataset:
3705
- new_keys = {key: SearchKey.PHONE for key in maybe_keys}
3706
- search_keys.update(new_keys)
3707
- self.autodetected_search_keys.update(new_keys)
3708
- self.logger.info(f"Autodetected search key PHONE in column {maybe_keys}")
3594
+ search_keys[maybe_key] = SearchKey.PHONE
3595
+ self.autodetected_search_keys[maybe_key] = SearchKey.PHONE
3596
+ self.logger.info(f"Autodetected search key PHONE in column {maybe_key}")
3709
3597
  if not silent_mode:
3710
- print(self.bundle.get("phone_detected").format(maybe_keys))
3598
+ print(self.bundle.get("phone_detected").format(maybe_key))
3711
3599
  else:
3712
3600
  self.logger.warning(
3713
- f"Autodetected search key PHONE in column {maybe_keys}. "
3714
- "But not used because not registered user"
3601
+ f"Autodetected search key PHONE in column {maybe_key}. But not used because not registered user"
3715
3602
  )
3716
3603
  if not silent_mode:
3717
3604
  print(self.bundle.get("phone_detected_not_registered"))
upgini/fingerprint.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
3
+ * Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
4
+ *
5
+ * This software contains code from open-source projects:
6
+ * MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
7
+ */
8
+ var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
upgini/metadata.py CHANGED
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Set
4
4
  from pydantic import BaseModel
5
5
 
6
6
  SYSTEM_RECORD_ID = "system_record_id"
7
- ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
8
- SEARCH_KEY_UNNEST = "search_key_unnest"
9
7
  SORT_ID = "sort_id"
10
8
  EVAL_SET_INDEX = "eval_set_index"
11
9
  TARGET = "target"
@@ -13,7 +11,7 @@ COUNTRY = "country_iso_code"
13
11
  RENAMED_INDEX = "index_col"
14
12
  DEFAULT_INDEX = "index"
15
13
  ORIGINAL_INDEX = "original_index"
16
- SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
14
+ SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
17
15
 
18
16
 
19
17
  class FileColumnMeaningType(Enum):
@@ -39,8 +37,6 @@ class FileColumnMeaningType(Enum):
39
37
  POSTAL_CODE = "POSTAL_CODE"
40
38
  SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
41
39
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
42
- ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
43
- UNNEST_KEY = "UNNEST_KEY"
44
40
 
45
41
 
46
42
  class SearchKey(Enum):
@@ -186,10 +182,6 @@ class FileColumnMetadata(BaseModel):
186
182
  meaningType: FileColumnMeaningType
187
183
  minMaxValues: Optional[NumericInterval] = None
188
184
  originalName: Optional[str]
189
- # is this column contains keys from multiple key columns like msisdn1, msisdn2
190
- isUnnest: bool = False
191
- # list of original etalon key column names like msisdn1, msisdn2
192
- unnestKeyNames: Optional[list[str]]
193
185
 
194
186
 
195
187
  class FileMetadata(BaseModel):
@@ -284,7 +276,7 @@ class FeaturesFilter(BaseModel):
284
276
 
285
277
 
286
278
  class RuntimeParameters(BaseModel):
287
- properties: Dict[str, str] = {}
279
+ properties: Dict[str, str] = dict()
288
280
 
289
281
 
290
282
  class SearchCustomization(BaseModel):
upgini/metrics.py CHANGED
@@ -357,7 +357,7 @@ class EstimatorWrapper:
357
357
  "logger": logger,
358
358
  }
359
359
  if estimator is None:
360
- params = {}
360
+ params = dict()
361
361
  # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
362
362
  # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
363
363
  if target_type == ModelTaskType.MULTICLASS:
@@ -88,7 +88,6 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
88
88
  search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
89
89
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
90
90
  single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
91
- unsupported_multi_key=Search key {} cannot be used multiple times
92
91
  unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
93
92
  date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
94
93
  invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -10,18 +10,16 @@ class BaseSearchKeyDetector:
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
11
  raise NotImplementedError()
12
12
 
13
- def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
- return [
15
- column_name
16
- for column_name in column_names
17
- if self._is_search_key_by_name(column_name)
18
- ]
13
+ def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
+ for column_name in column_names:
15
+ if self._is_search_key_by_name(column_name):
16
+ return column_name
19
17
 
20
- def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
- other_columns = [col for col in df.columns if col not in existing_search_keys]
22
- columns_by_names = self._get_search_keys_by_name(other_columns)
23
- columns_by_values = []
24
- for column_name in other_columns:
18
+ def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
+ maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
+ if maybe_column is not None:
21
+ return maybe_column
22
+
23
+ for column_name in df.columns:
25
24
  if self._is_search_key_by_values(df[column_name]):
26
- columns_by_values.append(column_name)
27
- return list(set(columns_by_names + columns_by_values))
25
+ return column_name
@@ -208,14 +208,13 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
208
208
  if nunique_dates / days_delta < 0.3:
209
209
  return False
210
210
 
211
- def check_differences(group):
212
- data = group.drop(date_col, axis=1)
213
- diffs = data.values[:, None] != data.values
214
- diff_counts = diffs.sum(axis=2)
215
- max_diff = np.max(diff_counts)
216
- return max_diff <= 2
217
-
218
- def is_multiple_rows(group):
211
+ accumulated_changing_columns = set()
212
+
213
+ def check_differences(group: pd.DataFrame):
214
+ changing_columns = group.columns[group.nunique(dropna=False) > 1].to_list()
215
+ accumulated_changing_columns.update(changing_columns)
216
+
217
+ def is_multiple_rows(group: pd.DataFrame) -> bool:
219
218
  return group.shape[0] > 1
220
219
 
221
220
  grouped = df.groupby(date_col)
@@ -228,8 +227,8 @@ def is_blocked_time_series(df: pd.DataFrame, date_col: str, search_keys: List[st
228
227
  if df.shape[1] <= 3:
229
228
  return True
230
229
 
231
- is_diff_less_than_two_columns = grouped.apply(check_differences)
232
- return is_diff_less_than_two_columns.all()
230
+ grouped.apply(check_differences, include_groups=False)
231
+ return len(accumulated_changing_columns) <= 2
233
232
 
234
233
 
235
234
  def validate_dates_distribution(
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import (
7
- ENTITY_SYSTEM_RECORD_ID,
8
- EVAL_SET_INDEX,
9
- SORT_ID,
10
- SYSTEM_RECORD_ID,
11
- TARGET,
12
- ModelTaskType,
13
- SearchKey,
14
- )
6
+ from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
15
7
  from upgini.resource_bundle import ResourceBundle
16
8
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
17
9
  from upgini.utils.target_utils import define_task
@@ -151,8 +143,6 @@ def clean_full_duplicates(
151
143
  unique_columns = df.columns.tolist()
152
144
  if SYSTEM_RECORD_ID in unique_columns:
153
145
  unique_columns.remove(SYSTEM_RECORD_ID)
154
- if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
- unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
156
146
  if SORT_ID in unique_columns:
157
147
  unique_columns.remove(SORT_ID)
158
148
  if EVAL_SET_INDEX in unique_columns:
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
- unnest_search_keys: Optional[List[str]] = None,
42
41
  logger: Optional[logging.Logger] = None,
43
42
  ):
44
43
  self.email_column = email_column
45
44
  self.hem_column = hem_column
46
45
  self.search_keys = search_keys
47
- self.unnest_search_keys = unnest_search_keys
48
46
  if logger is not None:
49
47
  self.logger = logger
50
48
  else:
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
82
80
  del self.search_keys[self.email_column]
83
81
  return df
84
82
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
- self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
86
83
  self.email_converted_to_hem = True
87
84
 
88
85
  del self.search_keys[self.email_column]
89
- if self.email_column in self.unnest_search_keys:
90
- self.unnest_search_keys.remove(self.email_column)
91
86
 
92
87
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
93
88
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.278a2
3
+ Version: 1.1.279a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -1,11 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
3
- upgini/dataset.py,sha256=qdIxHiDGZT_iNTBswNeIuc9TPfvUlNqvSmRqMyigZBM,46187
3
+ upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=i6Peb4ws4IyZNRKPj8tO8gO-RI1K2xfLX9zDqkNH0bQ,181799
5
+ upgini/features_enricher.py,sha256=ys7RQoZsyY8-NkUZyp12K8z5aQmg7pyx0LtwclFtXkc,176358
6
+ upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
6
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
7
- upgini/metadata.py,sha256=TNZbtIuxYkBFGQu3gGm2flA6vsKyUPN4Q-Du3fFjmSM,10101
8
- upgini/metrics.py,sha256=YhyPik38cBI5x5KfdiE_qocJnUjZbSqUj8GUtCqnG0g,29648
8
+ upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
+ upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
9
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
10
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
11
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -28,22 +29,22 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
28
29
  upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
29
30
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
30
31
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
31
- upgini/resource_bundle/strings.properties,sha256=-JDIa0nAoA5utK7UZZAUgLDsozJNI08dDcbIaOSsvQg,26353
32
+ upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
32
33
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
33
34
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
35
36
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
36
37
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
37
38
  upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
38
- upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
39
+ upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
39
40
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
40
41
  upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
41
42
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
42
43
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
43
- upgini/utils/datetime_utils.py,sha256=RW9eGCGQyYBsIU9XbYKt4hQiXUNppb4Grszg4EdKeY4,10398
44
- upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
44
+ upgini/utils/datetime_utils.py,sha256=La3jQSkc1cdFAm6KcSAOWKg6-n7rFzTlDjMONxm45YM,10411
45
+ upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
45
46
  upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
46
- upgini/utils/email_utils.py,sha256=KHqIUagBWd3jOj3V7mW0ZkBOc-2XzAIA3p1xxZgy-L4,3813
47
+ upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
47
48
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
48
49
  upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
49
50
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
@@ -55,8 +56,8 @@ upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,4
55
56
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
56
57
  upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
57
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
58
- upgini-1.1.278a2.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
- upgini-1.1.278a2.dist-info/METADATA,sha256=Ru3Yqgq1AgTr2H-1cupVIsnwAX0pqta2q4fLVd6kdHc,48158
60
- upgini-1.1.278a2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
61
- upgini-1.1.278a2.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
- upgini-1.1.278a2.dist-info/RECORD,,
59
+ upgini-1.1.279a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
+ upgini-1.1.279a1.dist-info/METADATA,sha256=tcdQ86ByFS4oZKHAS_DPGVUATTQo0JKDYB6Lw7E_oR4,48158
61
+ upgini-1.1.279a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
+ upgini-1.1.279a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
+ upgini-1.1.279a1.dist-info/RECORD,,