upgini 1.1.266a3254.post1__tar.gz → 1.1.267__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {upgini-1.1.266a3254.post1/src/upgini.egg-info → upgini-1.1.267}/PKG-INFO +1 -1
  2. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/setup.py +1 -1
  3. upgini-1.1.267/src/upgini/autofe/date.py +53 -0
  4. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/features_enricher.py +38 -6
  5. upgini-1.1.267/src/upgini/fingerprint.js +8 -0
  6. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/resource_bundle/strings.properties +4 -2
  7. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/target_utils.py +18 -0
  8. {upgini-1.1.266a3254.post1 → upgini-1.1.267/src/upgini.egg-info}/PKG-INFO +1 -1
  9. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini.egg-info/SOURCES.txt +1 -0
  10. upgini-1.1.267/tests/test_autofe_operands.py +27 -0
  11. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_features_enricher.py +32 -7
  12. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_metrics.py +36 -36
  13. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_target_utils.py +61 -1
  14. upgini-1.1.266a3254.post1/src/upgini/autofe/date.py +0 -109
  15. upgini-1.1.266a3254.post1/tests/test_autofe_operands.py +0 -93
  16. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/LICENSE +0 -0
  17. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/README.md +0 -0
  18. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/pyproject.toml +0 -0
  19. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/setup.cfg +0 -0
  20. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/__init__.py +0 -0
  21. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/ads.py +0 -0
  22. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/ads_management/__init__.py +0 -0
  23. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/ads_management/ads_manager.py +0 -0
  24. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/__init__.py +0 -0
  25. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/all_operands.py +0 -0
  26. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/binary.py +0 -0
  27. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/feature.py +0 -0
  28. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/groupby.py +0 -0
  29. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/operand.py +0 -0
  30. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/unary.py +0 -0
  31. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/dataset.py +0 -0
  35. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/errors.py +0 -0
  36. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/http.py +0 -0
  37. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/metadata.py +0 -0
  40. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/metrics.py +0 -0
  41. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/normalizer/phone_normalizer.py +0 -0
  43. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/search_task.py +0 -0
  51. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/spinner.py +0 -0
  52. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/__init__.py +0 -0
  53. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/base_search_key_detector.py +0 -0
  54. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/blocked_time_series.py +0 -0
  55. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/country_utils.py +0 -0
  56. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/custom_loss_utils.py +0 -0
  57. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/cv_utils.py +0 -0
  58. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/datetime_utils.py +0 -0
  59. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/deduplicate_utils.py +0 -0
  60. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/display_utils.py +0 -0
  61. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/email_utils.py +0 -0
  62. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/fallback_progress_bar.py +0 -0
  63. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/features_validator.py +0 -0
  64. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/format.py +0 -0
  65. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/ip_utils.py +0 -0
  66. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/phone_utils.py +0 -0
  67. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/postal_code_utils.py +0 -0
  68. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/progress_bar.py +0 -0
  69. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/sklearn_ext.py +0 -0
  70. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/track_info.py +0 -0
  71. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/utils/warning_counter.py +0 -0
  72. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini/version_validator.py +0 -0
  73. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini.egg-info/dependency_links.txt +0 -0
  74. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini.egg-info/requires.txt +0 -0
  75. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/src/upgini.egg-info/top_level.txt +0 -0
  76. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_binary_dataset.py +0 -0
  77. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_blocked_time_series.py +0 -0
  78. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_categorical_dataset.py +0 -0
  79. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_continuous_dataset.py +0 -0
  80. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_country_utils.py +0 -0
  81. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_custom_loss_utils.py +0 -0
  82. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_datetime_utils.py +0 -0
  83. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_email_utils.py +0 -0
  84. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_etalon_validation.py +0 -0
  85. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_phone_utils.py +0 -0
  86. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_postal_code_utils.py +0 -0
  87. {upgini-1.1.266a3254.post1 → upgini-1.1.267}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.266a3254.post1
3
+ Version: 1.1.267
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.266a3254-1"
43
+ version = "1.1.267"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -0,0 +1,53 @@
1
+ from typing import Optional, Union
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from upgini.autofe.operand import PandasOperand
6
+
7
+
8
+ class DateDiffMixin:
9
+ diff_unit: str = "D"
10
+ left_unit: Optional[str] = None
11
+ right_unit: Optional[str] = None
12
+
13
+ def _convert_to_date(
14
+ self, x: Union[pd.DataFrame, pd.Series], unit: Optional[str]
15
+ ) -> Union[pd.DataFrame, pd.Series]:
16
+ if isinstance(x, pd.DataFrame):
17
+ return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
18
+
19
+ return pd.to_datetime(x, unit=unit)
20
+
21
+
22
+ class DateDiff(PandasOperand, DateDiffMixin):
23
+ name = "date_diff"
24
+ is_binary = True
25
+ has_symmetry_importance = True
26
+
27
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
28
+ left = self._convert_to_date(left, self.left_unit)
29
+ right = self._convert_to_date(right, self.right_unit)
30
+ return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
31
+
32
+ def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
33
+ x[x < 0] = None
34
+ return x
35
+
36
+
37
+ class DateDiffType2(PandasOperand, DateDiffMixin):
38
+ name = "date_diff_type2"
39
+ is_binary = True
40
+ has_symmetry_importance = True
41
+ is_vectorizable = False
42
+
43
+ def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
44
+ left = self._convert_to_date(left, self.left_unit)
45
+ right = self._convert_to_date(right, self.right_unit)
46
+ future = right + (left.dt.year - right.dt.year).apply(
47
+ lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
48
+ )
49
+ before = future[future < left]
50
+ future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
51
+ diff = (future - left) / np.timedelta64(1, self.diff_unit)
52
+
53
+ return diff
@@ -94,7 +94,7 @@ try:
94
94
  except Exception:
95
95
  from upgini.utils.fallback_progress_bar import CustomFallbackProgressBar as ProgressBar
96
96
 
97
- from upgini.utils.target_utils import define_task
97
+ from upgini.utils.target_utils import calculate_psi, define_task
98
98
  from upgini.utils.warning_counter import WarningCounter
99
99
  from upgini.version_validator import validate_version
100
100
 
@@ -2226,14 +2226,11 @@ class FeaturesEnricher(TransformerMixin):
2226
2226
  validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter
2227
2227
  )
2228
2228
 
2229
- has_date = self._get_date_column(self.fit_search_keys) is not None
2229
+ maybe_date_column = self._get_date_column(self.fit_search_keys)
2230
+ has_date = maybe_date_column is not None
2230
2231
  model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
2231
2232
  self._validate_binary_observations(validated_y, model_task_type)
2232
2233
 
2233
- df = self.__handle_index_search_keys(df, self.fit_search_keys)
2234
-
2235
- df = self.__correct_target(df)
2236
-
2237
2234
  self.runtime_parameters = get_runtime_params_custom_loss(
2238
2235
  self.loss, model_task_type, self.runtime_parameters, self.logger
2239
2236
  )
@@ -2245,6 +2242,13 @@ class FeaturesEnricher(TransformerMixin):
2245
2242
  eval_df[EVAL_SET_INDEX] = idx + 1
2246
2243
  df = pd.concat([df, eval_df])
2247
2244
 
2245
+ df = self.__correct_target(df)
2246
+
2247
+ df = self.__handle_index_search_keys(df, self.fit_search_keys)
2248
+
2249
+ if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
2250
+ self._validate_PSI(df.sort_values(by=maybe_date_column))
2251
+
2248
2252
  if DEFAULT_INDEX in df.columns:
2249
2253
  msg = self.bundle.get("unsupported_index_column")
2250
2254
  self.logger.info(msg)
@@ -3567,6 +3571,34 @@ class FeaturesEnricher(TransformerMixin):
3567
3571
  self.logger.warning(msg)
3568
3572
  print(msg)
3569
3573
 
3574
+ def _validate_PSI(self, df: pd.DataFrame):
3575
+ if EVAL_SET_INDEX in df.columns:
3576
+ train = df.query(f"{EVAL_SET_INDEX} == 0")
3577
+ eval1 = df.query(f"{EVAL_SET_INDEX} == 1")
3578
+ else:
3579
+ train = df
3580
+ eval1 = None
3581
+
3582
+ # 1. Check train PSI
3583
+ half_train = round(len(train) / 2)
3584
+ part1 = train[:half_train]
3585
+ part2 = train[half_train:]
3586
+ train_psi = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
3587
+ if train_psi > 0.2:
3588
+ self.warning_counter.increment()
3589
+ msg = self.bundle.get("train_unstable_target").format(train_psi)
3590
+ print(msg)
3591
+ self.logger.warning(msg)
3592
+
3593
+ # 2. Check train-test PSI
3594
+ if eval1 is not None:
3595
+ train_test_psi = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
3596
+ if train_test_psi > 0.2:
3597
+ self.warning_counter.increment()
3598
+ msg = self.bundle.get("eval_unstable_target").format(train_test_psi)
3599
+ print(msg)
3600
+ self.logger.warning(msg)
3601
+
3570
3602
  def _dump_python_libs(self):
3571
3603
  try:
3572
3604
  from pip._internal.operations.freeze import freeze
@@ -0,0 +1,8 @@
1
+ /**
2
+ * FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
3
+ * Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
4
+ *
5
+ * This software contains code from open-source projects:
6
+ * MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
7
+ */
8
+ var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
@@ -111,7 +111,9 @@ x_is_empty=X is empty
111
111
  y_is_empty=y is empty
112
112
  x_contains_reserved_column_name=Column name {} is reserved. Please rename column and try again
113
113
  missing_generate_feature=\nWARNING: Feature {} specified in `generate_features` is not present in input columns: {}
114
- x_unstable_by_date=\nWARNING: Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample.
114
+ x_unstable_by_date=\nWARNING: Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample
115
+ train_unstable_target=\nWARNING: Your training sample contains an unstable target event, PSI = {}. This will lead to unstable scoring on deferred samples. It is recommended to redesign the training sample
116
+ eval_unstable_target=\nWARNING: Your training and evaluation samples have a difference in target distribution. PSI = {}. The results will be unstable. It is recommended to redesign the training and evaluation samples
115
117
  # eval set validation
116
118
  unsupported_type_eval_set=Unsupported type of eval_set: {}. It should be list of tuples with two elements: X and y
117
119
  eval_set_invalid_tuple_size=eval_set contains a tuple of size {}. It should contain only pairs of X and y
@@ -198,7 +200,7 @@ email_detected=Emails detected in column `{}`. It will be used as a search key\n
198
200
  email_detected_not_registered=Emails detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
199
201
  phone_detected=Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
200
202
  phone_detected_not_registered=\nWARNING: Phone numbers detected in column `{}`. It can be used only with api_key from profile.upgini.com\nSee docs to turn off the automatic detection: https://github.com/upgini/upgini/blob/main/README.md#turn-off-autodetection-for-search-key-columns
201
- target_type_detected=Detected task type: {}\n
203
+ target_type_detected=\nDetected task type: {}\n
202
204
  # all_ok_community_invite=Chat with us in Slack community:
203
205
  all_ok_community_invite=❓ Support request
204
206
  too_small_for_metrics=Your train dataset contains less than 500 rows. For such dataset Upgini will not calculate accuracy metrics. Please increase the number of rows in the training dataset to calculate accuracy metrics
@@ -177,3 +177,21 @@ def balance_undersample(
177
177
 
178
178
  logger.info(f"Shape after rebalance resampling: {resampled_data}")
179
179
  return resampled_data
180
+
181
+
182
+ def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
183
+ df = pd.concat([expected, actual])
184
+
185
+ # Define the bins for the target variable
186
+ df_min = df.min()
187
+ df_max = df.max()
188
+ bins = [df_min, (df_min + df_max) / 2, df_max]
189
+
190
+ # Calculate the base distribution
191
+ train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
192
+
193
+ # Calculate the target distribution
194
+ test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
195
+
196
+ # Calculate the PSI
197
+ return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.266a3254.post1
3
+ Version: 1.1.267
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -7,6 +7,7 @@ src/upgini/ads.py
7
7
  src/upgini/dataset.py
8
8
  src/upgini/errors.py
9
9
  src/upgini/features_enricher.py
10
+ src/upgini/fingerprint.js
10
11
  src/upgini/http.py
11
12
  src/upgini/metadata.py
12
13
  src/upgini/metrics.py
@@ -0,0 +1,27 @@
1
+ import pandas as pd
2
+ from upgini.autofe.date import DateDiff, DateDiffType2
3
+
4
+ from datetime import datetime
5
+ from pandas.testing import assert_series_equal
6
+
7
+
8
+ def test_date_diff():
9
+ df = pd.DataFrame(
10
+ [[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(2023, 10, 10), datetime(2022, 10, 10)]],
11
+ columns=["date1", "date2"],
12
+ )
13
+
14
+ operand = DateDiff()
15
+ expected_result = pd.Series([10531, None])
16
+ assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
17
+
18
+
19
+ def test_date_diff_future():
20
+ df = pd.DataFrame(
21
+ [[datetime(1993, 12, 10), datetime(2022, 10, 10)], [datetime(1993, 4, 10), datetime(2022, 10, 10)]],
22
+ columns=["date1", "date2"],
23
+ )
24
+
25
+ operand = DateDiffType2()
26
+ expected_result = pd.Series([61.0, 182.0])
27
+ assert_series_equal(operand.calculate_binary(df.date2, df.date1), expected_result)
@@ -428,13 +428,13 @@ def test_saved_features_enricher(requests_mock: Mocker):
428
428
  df.drop(columns="SystemRecordId_473310000", inplace=True)
429
429
  train_df = df.head(10000)
430
430
  train_features = train_df.drop(columns="target")
431
- train_target = train_df["target"]
431
+ train_target = train_df["target"].copy()
432
432
  eval1_df = df[10000:11000].reset_index(drop=True)
433
433
  eval1_features = eval1_df.drop(columns="target")
434
- eval1_target = eval1_df["target"].reset_index(drop=True)
434
+ eval1_target = eval1_df["target"].reset_index(drop=True).copy()
435
435
  eval2_df = df[11000:12000]
436
436
  eval2_features = eval2_df.drop(columns="target")
437
- eval2_target = eval2_df["target"]
437
+ eval2_target = eval2_df["target"].copy()
438
438
 
439
439
  enricher = FeaturesEnricher(
440
440
  search_keys={"phone_num": SearchKey.PHONE, "rep_date": SearchKey.DATE},
@@ -482,6 +482,31 @@ def test_saved_features_enricher(requests_mock: Mocker):
482
482
  assert first_feature_info[feature_name_header] == "feature"
483
483
  assert first_feature_info[shap_value_header] == 10.1
484
484
 
485
+ # Check imbalanced target metrics
486
+ random = np.random.RandomState(42)
487
+ train_random_indices = random.choice(train_target.index, size=9000, replace=False)
488
+ train_target.loc[train_random_indices] = 0
489
+
490
+ metrics = enricher.calculate_metrics(
491
+ train_features,
492
+ train_target
493
+ )
494
+ expected_metrics = pd.DataFrame(
495
+ {
496
+ segment_header: [train_segment],
497
+ rows_header: [10000],
498
+ target_mean_header: [0.049],
499
+ enriched_gini: [0.000985],
500
+ }
501
+ )
502
+ print("Expected metrics: ")
503
+ print(expected_metrics)
504
+ print("Actual metrics: ")
505
+ print(metrics)
506
+
507
+ assert metrics is not None
508
+ assert_frame_equal(expected_metrics, metrics, atol=1e-6)
509
+
485
510
 
486
511
  def test_features_enricher_with_demo_key(requests_mock: Mocker):
487
512
  url = "http://fake_url2"
@@ -2498,11 +2523,11 @@ def test_diff_target_dups(requests_mock: Mocker):
2498
2523
  assert len(self.data) == 2
2499
2524
  print(self.data)
2500
2525
  assert self.data.loc[0, "date_0e8763"] == 1672531200000
2501
- assert self.data.loc[0, "feature_2ad562"] == 12
2502
- assert self.data.loc[0, "target"] == 0
2526
+ assert self.data.loc[0, "feature_2ad562"] == 13
2527
+ assert self.data.loc[0, "target"] == 1
2503
2528
  assert self.data.loc[1, "date_0e8763"] == 1672531200000
2504
- assert self.data.loc[1, "feature_2ad562"] == 13
2505
- assert self.data.loc[1, "target"] == 1
2529
+ assert self.data.loc[1, "feature_2ad562"] == 12
2530
+ assert self.data.loc[1, "target"] == 0
2506
2531
  return SearchTask("123", self, rest_client=enricher.rest_client)
2507
2532
 
2508
2533
  Dataset.search = mock_search
@@ -857,23 +857,23 @@ def test_catboost_metric_binary(requests_mock: Mocker):
857
857
  assert metrics_df.loc[0, segment_header] == train_segment
858
858
  assert metrics_df.loc[0, rows_header] == 500
859
859
  assert metrics_df.loc[0, target_mean_header] == 0.51
860
- assert metrics_df.loc[0, baseline_gini] == approx(0.023101)
861
- assert metrics_df.loc[0, enriched_gini] == approx(0.090344)
862
- assert metrics_df.loc[0, uplift] == approx(0.067243)
860
+ assert metrics_df.loc[0, baseline_gini] == approx(0.061408)
861
+ assert metrics_df.loc[0, enriched_gini] == approx(0.071498)
862
+ assert metrics_df.loc[0, uplift] == approx(0.010090)
863
863
 
864
864
  assert metrics_df.loc[1, segment_header] == eval_1_segment
865
865
  assert metrics_df.loc[1, rows_header] == 250
866
866
  assert metrics_df.loc[1, target_mean_header] == 0.452
867
- assert metrics_df.loc[1, baseline_gini] == approx(-0.016188)
868
- assert metrics_df.loc[1, enriched_gini] == approx(0.014947)
869
- assert metrics_df.loc[1, uplift] == approx(0.031135)
867
+ assert metrics_df.loc[1, baseline_gini] == approx(-0.051702)
868
+ assert metrics_df.loc[1, enriched_gini] == approx(0.023668)
869
+ assert metrics_df.loc[1, uplift] == approx(0.075370)
870
870
 
871
871
  assert metrics_df.loc[2, segment_header] == eval_2_segment
872
872
  assert metrics_df.loc[2, rows_header] == 250
873
873
  assert metrics_df.loc[2, target_mean_header] == 0.536
874
- assert metrics_df.loc[2, baseline_gini] == approx(-0.017138)
875
- assert metrics_df.loc[2, enriched_gini] == approx(0.035666)
876
- assert metrics_df.loc[2, uplift] == approx(0.052805)
874
+ assert metrics_df.loc[2, baseline_gini] == approx(0.012674)
875
+ assert metrics_df.loc[2, enriched_gini] == approx(0.022980)
876
+ assert metrics_df.loc[2, uplift] == approx(0.010306)
877
877
 
878
878
 
879
879
  def test_catboost_metric_binary_with_cat_features(requests_mock: Mocker):
@@ -984,23 +984,23 @@ def test_catboost_metric_binary_with_cat_features(requests_mock: Mocker):
984
984
  assert metrics_df.loc[0, segment_header] == train_segment
985
985
  assert metrics_df.loc[0, rows_header] == 500
986
986
  assert metrics_df.loc[0, target_mean_header] == 0.51
987
- assert metrics_df.loc[0, baseline_gini] == approx(0.102928)
988
- assert metrics_df.loc[0, enriched_gini] == approx(0.139437)
989
- assert metrics_df.loc[0, uplift] == approx(0.036508)
987
+ assert metrics_df.loc[0, baseline_gini] == approx(0.027066)
988
+ assert metrics_df.loc[0, enriched_gini] == approx(0.101601)
989
+ assert metrics_df.loc[0, uplift] == approx(0.074535)
990
990
 
991
991
  assert metrics_df.loc[1, segment_header] == eval_1_segment
992
992
  assert metrics_df.loc[1, rows_header] == 250
993
993
  assert metrics_df.loc[1, target_mean_header] == 0.452
994
- assert metrics_df.loc[1, baseline_gini] == approx(-0.074491)
995
- assert metrics_df.loc[1, enriched_gini] == approx(-0.052619)
996
- assert metrics_df.loc[1, uplift] == approx(0.021872)
994
+ assert metrics_df.loc[1, baseline_gini] == approx(-0.078548)
995
+ assert metrics_df.loc[1, enriched_gini] == approx(-0.019663)
996
+ assert metrics_df.loc[1, uplift] == approx(0.058885)
997
997
 
998
998
  assert metrics_df.loc[2, segment_header] == eval_2_segment
999
999
  assert metrics_df.loc[2, rows_header] == 250
1000
1000
  assert metrics_df.loc[2, target_mean_header] == 0.536
1001
- assert metrics_df.loc[2, baseline_gini] == approx(0.022002)
1002
- assert metrics_df.loc[2, enriched_gini] == approx(-0.010950)
1003
- assert metrics_df.loc[2, uplift] == approx(-0.032952)
1001
+ assert metrics_df.loc[2, baseline_gini] == approx(-0.066572)
1002
+ assert metrics_df.loc[2, enriched_gini] == approx(-0.116598)
1003
+ assert metrics_df.loc[2, uplift] == approx(-0.050026)
1004
1004
 
1005
1005
 
1006
1006
  @pytest.mark.skip()
@@ -1225,23 +1225,23 @@ def test_rf_metric_rmse(requests_mock: Mocker):
1225
1225
  assert metrics_df.loc[0, segment_header] == train_segment
1226
1226
  assert metrics_df.loc[0, rows_header] == 500
1227
1227
  assert metrics_df.loc[0, target_mean_header] == 0.51
1228
- assert metrics_df.loc[0, baseline_rmse] == approx(0.737054)
1229
- assert metrics_df.loc[0, enriched_rmse] == approx(0.720624)
1230
- assert metrics_df.loc[0, uplift] == approx(0.016430)
1228
+ assert metrics_df.loc[0, baseline_rmse] == approx(0.695490)
1229
+ assert metrics_df.loc[0, enriched_rmse] == approx(0.656957)
1230
+ assert metrics_df.loc[0, uplift] == approx(0.038533)
1231
1231
 
1232
1232
  assert metrics_df.loc[1, segment_header] == eval_1_segment
1233
1233
  assert metrics_df.loc[1, rows_header] == 250
1234
1234
  assert metrics_df.loc[1, target_mean_header] == 0.452
1235
- assert metrics_df.loc[1, baseline_rmse] == approx(0.704719)
1236
- assert metrics_df.loc[1, enriched_rmse] == approx(0.721444)
1237
- assert metrics_df.loc[1, uplift] == approx(-0.016725)
1235
+ assert metrics_df.loc[1, baseline_rmse] == approx(0.717178)
1236
+ assert metrics_df.loc[1, enriched_rmse] == approx(0.685107)
1237
+ assert metrics_df.loc[1, uplift] == approx(0.032071)
1238
1238
 
1239
1239
  assert metrics_df.loc[2, segment_header] == eval_2_segment
1240
1240
  assert metrics_df.loc[2, rows_header] == 250
1241
1241
  assert metrics_df.loc[2, target_mean_header] == 0.536
1242
- assert metrics_df.loc[2, baseline_rmse] == approx(0.690261)
1243
- assert metrics_df.loc[2, enriched_rmse] == approx(0.694711)
1244
- assert metrics_df.loc[2, uplift] == approx(-0.004450)
1242
+ assert metrics_df.loc[2, baseline_rmse] == approx(0.678079)
1243
+ assert metrics_df.loc[2, enriched_rmse] == approx(0.718205)
1244
+ assert metrics_df.loc[2, uplift] == approx(-0.040126)
1245
1245
 
1246
1246
 
1247
1247
  def test_default_metric_binary_with_string_feature(requests_mock: Mocker):
@@ -1341,23 +1341,23 @@ def test_default_metric_binary_with_string_feature(requests_mock: Mocker):
1341
1341
  assert metrics_df.loc[0, segment_header] == train_segment
1342
1342
  assert metrics_df.loc[0, rows_header] == 500
1343
1343
  assert metrics_df.loc[0, target_mean_header] == 0.51
1344
- assert metrics_df.loc[0, baseline_gini] == approx(0.116841)
1345
- assert metrics_df.loc[0, enriched_gini] == approx(0.076030)
1346
- assert metrics_df.loc[0, uplift] == approx(-0.040811)
1344
+ assert metrics_df.loc[0, baseline_gini] == approx(-0.034968)
1345
+ assert metrics_df.loc[0, enriched_gini] == approx(-0.090683)
1346
+ assert metrics_df.loc[0, uplift] == approx(-0.055715)
1347
1347
 
1348
1348
  assert metrics_df.loc[1, segment_header] == eval_1_segment
1349
1349
  assert metrics_df.loc[1, rows_header] == 250
1350
1350
  assert metrics_df.loc[1, target_mean_header] == 0.452
1351
- assert metrics_df.loc[1, baseline_gini] == approx(-0.078160)
1352
- assert metrics_df.loc[1, enriched_gini] == approx(-0.029288)
1353
- assert metrics_df.loc[1, uplift] == approx(0.048873)
1351
+ assert metrics_df.loc[1, baseline_gini] == approx(-0.081674)
1352
+ assert metrics_df.loc[1, enriched_gini] == approx(-0.006627)
1353
+ assert metrics_df.loc[1, uplift] == approx(0.075047)
1354
1354
 
1355
1355
  assert metrics_df.loc[2, segment_header] == eval_2_segment
1356
1356
  assert metrics_df.loc[2, rows_header] == 250
1357
1357
  assert metrics_df.loc[2, target_mean_header] == 0.536
1358
- assert metrics_df.loc[2, baseline_gini] == approx(-0.013484)
1359
- assert metrics_df.loc[2, enriched_gini] == approx(-0.017486)
1360
- assert metrics_df.loc[2, uplift] == approx(-0.004002)
1358
+ assert metrics_df.loc[2, baseline_gini] == approx(-0.039166)
1359
+ assert metrics_df.loc[2, enriched_gini] == approx(-0.016457)
1360
+ assert metrics_df.loc[2, uplift] == approx(0.022710)
1361
1361
 
1362
1362
 
1363
1363
  def approx(value: float):
@@ -4,7 +4,8 @@ import pytest
4
4
  from pandas.testing import assert_frame_equal
5
5
 
6
6
  from upgini.errors import ValidationError
7
- from upgini.metadata import SYSTEM_RECORD_ID, TARGET, ModelTaskType
7
+ from upgini.features_enricher import FeaturesEnricher
8
+ from upgini.metadata import SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
8
9
  from upgini.resource_bundle import bundle
9
10
  from upgini.utils.target_utils import balance_undersample, define_task
10
11
 
@@ -132,3 +133,62 @@ def test_balance_undersaampling_multiclass():
132
133
  })
133
134
  # Get all of 25% quantile class (b) and minor classes (a) and x2 (or all if less) of major classes
134
135
  assert_frame_equal(balanced_df.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True), expected_df)
136
+
137
+
138
+ def test_binary_psi_calculation():
139
+ df = pd.DataFrame({
140
+ "target": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1]
141
+ })
142
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
143
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
144
+ enricher._validate_PSI(df)
145
+ assert not enricher.warning_counter.has_warnings()
146
+
147
+ df = pd.DataFrame({
148
+ "target": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]
149
+ })
150
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
151
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
152
+ enricher._validate_PSI(df)
153
+ assert enricher.warning_counter._count == 1
154
+
155
+ df = pd.DataFrame({
156
+ "target": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1],
157
+ "eval_set_index": [0] * 10 + [1] * 10,
158
+ })
159
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
160
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
161
+ enricher._validate_PSI(df)
162
+ assert enricher.warning_counter._count == 1
163
+
164
+ df = pd.DataFrame({
165
+ "target": [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
166
+ "eval_set_index": [0] * 10 + [1] * 10,
167
+ })
168
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
169
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
170
+ enricher._validate_PSI(df)
171
+ assert enricher.warning_counter._count == 2
172
+
173
+
174
+ def test_regression_psi_calculation():
175
+ random = np.random.RandomState(42)
176
+ df = pd.DataFrame({
177
+ "target": random.rand(20)
178
+ })
179
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
180
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
181
+ enricher._validate_PSI(df)
182
+ assert enricher.warning_counter._count == 1
183
+
184
+ values1 = random.rand(10)
185
+ values2 = values1.copy()
186
+ values2[0] = 0.0
187
+ values2[9] = 1.0
188
+ df = pd.DataFrame({
189
+ "target": list(values1) + list(values2)
190
+ })
191
+ df["date"] = pd.date_range("2020-01-01", "2020-01-20")
192
+ enricher = FeaturesEnricher(search_keys={"date": SearchKey.DATE})
193
+ enricher._validate_PSI(df)
194
+ assert not enricher.warning_counter.has_warnings()
@@ -1,109 +0,0 @@
1
- import abc
2
- from typing import Any, List, Optional, Union
3
- import numpy as np
4
- import pandas as pd
5
- from pydantic import BaseModel
6
-
7
- from upgini.autofe.operand import PandasOperand
8
-
9
-
10
- class DateDiffMixin(BaseModel):
11
- diff_unit: str = "D"
12
- left_unit: Optional[str] = None
13
- right_unit: Optional[str] = None
14
-
15
- def _convert_to_date(
16
- self, x: Union[pd.DataFrame, pd.Series], unit: Optional[str]
17
- ) -> Union[pd.DataFrame, pd.Series]:
18
- if isinstance(x, pd.DataFrame):
19
- return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
20
-
21
- return pd.to_datetime(x, unit=unit)
22
-
23
-
24
- class DateDiff(PandasOperand, DateDiffMixin):
25
- name = "date_diff"
26
- is_binary = True
27
- has_symmetry_importance = True
28
-
29
- def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
30
- left = self._convert_to_date(left, self.left_unit)
31
- right = self._convert_to_date(right, self.right_unit)
32
- return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
33
-
34
- def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
35
- x[x < 0] = None
36
- return x
37
-
38
-
39
- class DateDiffType2(PandasOperand, DateDiffMixin):
40
- name = "date_diff_type2"
41
- is_binary = True
42
- has_symmetry_importance = True
43
-
44
- def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
45
- left = self._convert_to_date(left, self.left_unit)
46
- right = self._convert_to_date(right, self.right_unit)
47
- future = right + (left.dt.year - right.dt.year).apply(
48
- lambda y: np.datetime64("NaT") if np.isnan(y) else pd.tseries.offsets.DateOffset(years=y)
49
- )
50
- before = future[future < left]
51
- future[future < left] = before + pd.tseries.offsets.DateOffset(years=1)
52
- diff = (future - left) / np.timedelta64(1, self.diff_unit)
53
-
54
- return diff
55
-
56
-
57
- class DateListDiff(PandasOperand, DateDiffMixin):
58
- is_binary = True
59
- has_symmetry_importance = True
60
- aggregation: str
61
-
62
- def __init__(self, **data: Any) -> None:
63
- if "name" not in data:
64
- data["name"] = f"date_diff_{data.get('aggregation')}"
65
- super().__init__(**data)
66
-
67
- def map_diff(self, left: np.datetime64, right: list) -> list:
68
- return (left - self._convert_to_date(pd.Series(right), self.right_unit)) / np.timedelta64(1, self.diff_unit)
69
-
70
- def reduce(self, diff_list: pd.Series) -> float:
71
- return diff_list[diff_list > 0].aggregate(self.aggregation)
72
-
73
- def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
74
- left = self._convert_to_date(left, self.left_unit)
75
-
76
- return pd.Series(left.index.map(lambda i: self.reduce(self.map_diff(left.loc[i], right.loc[i]))))
77
-
78
-
79
- class DateListDiffBounded(DateListDiff):
80
- lower_bound: Optional[int]
81
- upper_bound: Optional[int]
82
- inclusive: Optional[str]
83
-
84
- def __init__(self, **data: Any) -> None:
85
- if "name" not in data:
86
- inclusive = data.get("inclusive")
87
- lower_bound = data.get("lower_bound")
88
- upper_bound = data.get("upper_bound")
89
- components = [
90
- "date_diff",
91
- data.get("diff_unit"),
92
- str(lower_bound if lower_bound is not None else "minusinf"),
93
- str(upper_bound if upper_bound is not None else "plusinf"),
94
- ]
95
- if inclusive:
96
- components.append(inclusive)
97
- components.append(data.get("aggregation"))
98
- data["name"] = "_".join(components)
99
- super().__init__(**data)
100
-
101
- def reduce(self, diff_list: pd.Series) -> float:
102
- return diff_list[
103
- (diff_list > 0)
104
- & (
105
- diff_list.between(
106
- self.lower_bound or -np.inf, self.upper_bound or np.inf, inclusive=self.inclusive or "left"
107
- )
108
- )
109
- ].aggregate(self.aggregation)
@@ -1,93 +0,0 @@
1
- import pandas as pd
2
- from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
3
-
4
- from datetime import datetime
5
- from pandas.testing import assert_series_equal
6
-
7
-
8
- def test_date_diff():
9
- df = pd.DataFrame(
10
- [
11
- ["2022-10-10", pd.to_datetime("1993-12-10").timestamp()],
12
- ["2022-10-10", pd.to_datetime("2023-10-10").timestamp()],
13
- ],
14
- columns=["date1", "date2"],
15
- )
16
-
17
- operand = DateDiff(right_unit="s")
18
- expected_result = pd.Series([10531, None])
19
- assert_series_equal(operand.calculate_binary(df.date1, df.date2), expected_result)
20
-
21
-
22
- def test_date_diff_type2():
23
- df = pd.DataFrame(
24
- [
25
- [pd.to_datetime("2022-10-10").timestamp(), datetime(1993, 12, 10)],
26
- [pd.to_datetime("2022-10-10").timestamp(), datetime(1993, 4, 10)],
27
- ],
28
- columns=["date1", "date2"],
29
- )
30
-
31
- operand = DateDiffType2(left_unit="s")
32
- expected_result = pd.Series([61.0, 182.0])
33
- assert_series_equal(operand.calculate_binary(df.date1, df.date2), expected_result)
34
-
35
-
36
- def test_date_diff_list():
37
- df = pd.DataFrame(
38
- [
39
- ["2022-10-10", ["1993-12-10", "1993-12-11"]],
40
- ["2022-10-10", ["1993-12-10", "1993-12-10"]],
41
- ["2022-10-10", ["2023-10-10"]],
42
- ["2022-10-10", []],
43
- ],
44
- columns=["date1", "date2"],
45
- )
46
-
47
- def check(aggregation, expected_name, expected_values):
48
- operand = DateListDiff(aggregation=aggregation)
49
- assert operand.name == expected_name
50
- assert_series_equal(operand.calculate_binary(df.date1, df.date2), expected_values)
51
-
52
- check(aggregation="min", expected_name="date_diff_min", expected_values=pd.Series([10530, 10531, None, None]))
53
- check(aggregation="max", expected_name="date_diff_max", expected_values=pd.Series([10531, 10531, None, None]))
54
- check(aggregation="mean", expected_name="date_diff_mean", expected_values=pd.Series([10530.5, 10531, None, None]))
55
- check(aggregation="nunique", expected_name="date_diff_nunique", expected_values=pd.Series([2, 1, 0, 0]))
56
-
57
-
58
- def test_date_diff_list_bounded():
59
- df = pd.DataFrame(
60
- [
61
- ["2022-10-10", ["2013-12-10", "2013-12-11", "1999-12-11"]],
62
- [
63
- "2022-10-10",
64
- [
65
- "2013-12-10",
66
- "2003-12-11",
67
- "1999-12-11",
68
- "1993-12-11",
69
- "1983-12-11",
70
- "1973-12-11",
71
- "1959-12-11",
72
- ],
73
- ],
74
- ["2022-10-10", ["2003-12-10", "2003-12-10"]],
75
- ["2022-10-10", ["2023-10-10", "1993-12-10"]],
76
- ["2022-10-10", []],
77
- ],
78
- columns=["date1", "date2"],
79
- )
80
-
81
- def check_num_by_years(lower_bound, upper_bound, expected_name, expected_values):
82
- operand = DateListDiffBounded(
83
- diff_unit="Y", aggregation="count", lower_bound=lower_bound, upper_bound=upper_bound
84
- )
85
- assert operand.name == expected_name
86
- assert_series_equal(operand.calculate_binary(df.date1, df.date2), expected_values)
87
-
88
- check_num_by_years(0, 18, "date_diff_Y_0_18_count", pd.Series([2, 1, 0, 0, 0]))
89
- check_num_by_years(18, 23, "date_diff_Y_18_23_count", pd.Series([1, 2, 2, 0, 0]))
90
- check_num_by_years(23, 30, "date_diff_Y_23_30_count", pd.Series([0, 1, 0, 1, 0]))
91
- check_num_by_years(30, 45, "date_diff_Y_30_45_count", pd.Series([0, 1, 0, 0, 0]))
92
- check_num_by_years(45, 60, "date_diff_Y_45_60_count", pd.Series([0, 1, 0, 0, 0]))
93
- check_num_by_years(60, None, "date_diff_Y_60_plusinf_count", pd.Series([0, 1, 0, 0, 0]))
File without changes
File without changes
File without changes