upgini 1.1.260__tar.gz → 1.1.261a3233.post5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.1.260/src/upgini.egg-info → upgini-1.1.261a3233.post5}/PKG-INFO +1 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/setup.py +1 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/data_source/data_source_publisher.py +10 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/dataset.py +21 -58
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/features_enricher.py +1 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/search_task.py +1 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/datetime_utils.py +1 -1
- upgini-1.1.261a3233.post5/src/upgini/utils/target_utils.py +183 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini.egg-info/SOURCES.txt +0 -1
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_etalon_validation.py +5 -3
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_features_enricher.py +1 -0
- upgini-1.1.261a3233.post5/tests/test_target_utils.py +134 -0
- upgini-1.1.260/src/upgini/fingerprint.js +0 -8
- upgini-1.1.260/src/upgini/utils/target_utils.py +0 -74
- upgini-1.1.260/tests/test_target_utils.py +0 -74
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/LICENSE +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/README.md +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/pyproject.toml +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/setup.cfg +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/ads.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/errors.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/http.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/metadata.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/metrics.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/spinner.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_country_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_email_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_metrics.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.260 → upgini-1.1.261a3233.post5}/tests/test_widget.py +0 -0
{upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
@@ -40,7 +40,7 @@ class DataSourcePublisher:
|
|
|
40
40
|
if logs_enabled:
|
|
41
41
|
self.logger = LoggerFactory().get_logger(endpoint, api_key)
|
|
42
42
|
else:
|
|
43
|
-
self.logger = logging.getLogger()
|
|
43
|
+
self.logger = logging.getLogger("muted_logger")
|
|
44
44
|
self.logger.setLevel("FATAL")
|
|
45
45
|
|
|
46
46
|
def place(
|
|
@@ -170,6 +170,7 @@ class DataSourcePublisher:
|
|
|
170
170
|
print(msg)
|
|
171
171
|
self.logger.info(msg)
|
|
172
172
|
self._rest_client.stop_ads_management_task(task_id, trace_id)
|
|
173
|
+
raise
|
|
173
174
|
except Exception:
|
|
174
175
|
self.logger.exception("Failed to register data table")
|
|
175
176
|
raise
|
|
@@ -289,6 +290,7 @@ class DataSourcePublisher:
|
|
|
289
290
|
raise ValidationError("One of arguments: bq_table_id or search_keys should be presented")
|
|
290
291
|
if bq_table_id is not None and search_keys is not None:
|
|
291
292
|
raise ValidationError("Only one argument could be presented: bq_table_id or search_keys")
|
|
293
|
+
task_id = None
|
|
292
294
|
try:
|
|
293
295
|
search_keys = [k.value.value for k in search_keys] if search_keys else None
|
|
294
296
|
request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
|
|
@@ -303,6 +305,13 @@ class DataSourcePublisher:
|
|
|
303
305
|
raise Exception("Failed to register ADS: " + status_response["errorMessage"])
|
|
304
306
|
|
|
305
307
|
print("Uploading successfully finished")
|
|
308
|
+
except KeyboardInterrupt:
|
|
309
|
+
if task_id is not None:
|
|
310
|
+
msg = f"Stopping AdsManagementTask {task_id}"
|
|
311
|
+
print(msg)
|
|
312
|
+
self.logger.info(msg)
|
|
313
|
+
self._rest_client.stop_ads_management_task(task_id, trace_id)
|
|
314
|
+
raise
|
|
306
315
|
except Exception:
|
|
307
316
|
self.logger.exception(f"Failed to upload table {bq_table_id}")
|
|
308
317
|
raise
|
|
@@ -39,10 +39,10 @@ from upgini.metadata import (
|
|
|
39
39
|
)
|
|
40
40
|
from upgini.normalizer.phone_normalizer import PhoneNormalizer
|
|
41
41
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
42
|
-
from upgini.sampler.random_under_sampler import RandomUnderSampler
|
|
43
42
|
from upgini.search_task import SearchTask
|
|
44
43
|
from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
|
|
45
44
|
from upgini.utils.email_utils import EmailSearchKeyConverter
|
|
45
|
+
from upgini.utils.target_utils import balance_undersample
|
|
46
46
|
|
|
47
47
|
try:
|
|
48
48
|
from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
|
|
@@ -60,7 +60,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
60
60
|
FIT_SAMPLE_WITH_EVAL_SET_ROWS = 200_000
|
|
61
61
|
FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
|
|
62
62
|
MIN_SAMPLE_THRESHOLD = 5_000
|
|
63
|
-
IMBALANCE_THESHOLD = 0.
|
|
63
|
+
IMBALANCE_THESHOLD = 0.6
|
|
64
|
+
BINARY_BOOTSTRAP_LOOPS = 5
|
|
65
|
+
MULTICLASS_BOOTSTRAP_LOOPS = 2
|
|
64
66
|
MIN_TARGET_CLASS_ROWS = 100
|
|
65
67
|
MAX_MULTICLASS_CLASS_COUNT = 100
|
|
66
68
|
MIN_SUPPORTED_DATE_TS = 946684800000 # 2000-01-01
|
|
@@ -460,10 +462,8 @@ class Dataset: # (pd.DataFrame):
|
|
|
460
462
|
self.task_type == ModelTaskType.BINARY and len(train_segment) > self.MIN_SAMPLE_THRESHOLD
|
|
461
463
|
):
|
|
462
464
|
count = len(train_segment)
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, "")
|
|
466
|
-
target = train_segment[target_column].copy()
|
|
465
|
+
target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, TARGET)
|
|
466
|
+
target = train_segment[target_column]
|
|
467
467
|
target_classes_count = target.nunique()
|
|
468
468
|
|
|
469
469
|
if target_classes_count > self.MAX_MULTICLASS_CLASS_COUNT:
|
|
@@ -473,12 +473,9 @@ class Dataset: # (pd.DataFrame):
|
|
|
473
473
|
self.logger.warning(msg)
|
|
474
474
|
raise ValidationError(msg)
|
|
475
475
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
if current_class_count < min_class_count:
|
|
480
|
-
min_class_count = current_class_count
|
|
481
|
-
min_class_value = v
|
|
476
|
+
vc = target.value_counts()
|
|
477
|
+
min_class_value = vc.index[len(vc) - 1]
|
|
478
|
+
min_class_count = vc[min_class_value]
|
|
482
479
|
|
|
483
480
|
if min_class_count < self.MIN_TARGET_CLASS_ROWS:
|
|
484
481
|
msg = self.bundle.get("dataset_rarest_class_less_min").format(
|
|
@@ -491,53 +488,19 @@ class Dataset: # (pd.DataFrame):
|
|
|
491
488
|
min_class_threshold = min_class_percent * count
|
|
492
489
|
|
|
493
490
|
if min_class_count < min_class_threshold:
|
|
494
|
-
msg = self.bundle.get("dataset_rarest_class_less_threshold").format(
|
|
495
|
-
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
496
|
-
)
|
|
497
|
-
self.logger.warning(msg)
|
|
498
|
-
print(msg)
|
|
499
|
-
self.warning_counter.increment()
|
|
500
|
-
|
|
501
|
-
train_segment = train_segment.copy().sort_values(by=SYSTEM_RECORD_ID)
|
|
502
|
-
if self.task_type == ModelTaskType.MULTICLASS:
|
|
503
|
-
# Sort classes by rows count and find 25% quantile class
|
|
504
|
-
classes = target.value_counts().index
|
|
505
|
-
quantile25_idx = int(0.75 * len(classes))
|
|
506
|
-
quantile25_class = classes[quantile25_idx]
|
|
507
|
-
count_of_quantile25_class = len(target[target == quantile25_class])
|
|
508
|
-
msg = self.bundle.get("imbalance_multiclass").format(quantile25_class, count_of_quantile25_class)
|
|
509
|
-
self.logger.warning(msg)
|
|
510
|
-
print(msg)
|
|
511
|
-
# 25% and lower classes will stay as is. Higher classes will be downsampled
|
|
512
|
-
parts = []
|
|
513
|
-
for class_idx in range(quantile25_idx):
|
|
514
|
-
sampled = train_segment[train_segment[target_column] == classes[class_idx]].sample(
|
|
515
|
-
n=count_of_quantile25_class, random_state=self.random_state
|
|
516
|
-
)
|
|
517
|
-
parts.append(sampled)
|
|
518
|
-
for class_idx in range(quantile25_idx, len(classes)):
|
|
519
|
-
parts.append(train_segment[train_segment[target_column] == classes[class_idx]])
|
|
520
|
-
resampled_data = pd.concat(parts)
|
|
521
|
-
elif self.task_type == ModelTaskType.BINARY and min_class_count < self.MIN_SAMPLE_THRESHOLD / 2:
|
|
522
|
-
minority_class = train_segment[train_segment[target_column] == min_class_value]
|
|
523
|
-
majority_class = train_segment[train_segment[target_column] != min_class_value]
|
|
524
|
-
sampled_majority_class = majority_class.sample(
|
|
525
|
-
n=self.MIN_SAMPLE_THRESHOLD - min_class_count, random_state=self.random_state
|
|
526
|
-
)
|
|
527
|
-
resampled_data = train_segment[
|
|
528
|
-
(train_segment[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
529
|
-
| (train_segment[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
530
|
-
]
|
|
531
|
-
else:
|
|
532
|
-
sampler = RandomUnderSampler(random_state=self.random_state)
|
|
533
|
-
X = train_segment[SYSTEM_RECORD_ID]
|
|
534
|
-
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
535
|
-
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
536
|
-
resampled_data = train_segment[train_segment[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
537
|
-
|
|
538
|
-
self.data = resampled_data
|
|
539
|
-
self.logger.info(f"Shape after rebalance resampling: {self.data.shape}")
|
|
540
491
|
self.imbalanced = True
|
|
492
|
+
self.data = balance_undersample(
|
|
493
|
+
df=train_segment,
|
|
494
|
+
target_column=target_column,
|
|
495
|
+
task_type=self.task_type,
|
|
496
|
+
random_state=self.random_state,
|
|
497
|
+
imbalance_threshold=self.IMBALANCE_THESHOLD,
|
|
498
|
+
binary_bootstrap_loops=self.BINARY_BOOTSTRAP_LOOPS,
|
|
499
|
+
multiclass_bootstrap_loops=self.MULTICLASS_BOOTSTRAP_LOOPS,
|
|
500
|
+
logger=self.logger,
|
|
501
|
+
bundle=self.bundle,
|
|
502
|
+
warning_counter=self.warning_counter,
|
|
503
|
+
)
|
|
541
504
|
|
|
542
505
|
# Resample over fit threshold
|
|
543
506
|
if not self.imbalanced and EVAL_SET_INDEX in self.data.columns:
|
|
@@ -220,7 +220,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
220
220
|
if logs_enabled:
|
|
221
221
|
self.logger = LoggerFactory().get_logger(endpoint, self._api_key, client_ip, client_visitorid)
|
|
222
222
|
else:
|
|
223
|
-
self.logger = logging.getLogger()
|
|
223
|
+
self.logger = logging.getLogger("muted_logger")
|
|
224
224
|
self.logger.setLevel("FATAL")
|
|
225
225
|
|
|
226
226
|
if len(kwargs) > 0:
|
|
@@ -57,7 +57,7 @@ class SearchTask:
|
|
|
57
57
|
if logger is not None:
|
|
58
58
|
self.logger = logger
|
|
59
59
|
else:
|
|
60
|
-
self.logger = logging.getLogger()
|
|
60
|
+
self.logger = logging.getLogger("muted_logger")
|
|
61
61
|
self.logger.setLevel("FATAL")
|
|
62
62
|
self.provider_metadata_v2: Optional[List[ProviderTaskMetadataV2]] = None
|
|
63
63
|
self.unused_features_for_generation: Optional[List[str]] = None
|
|
@@ -44,7 +44,7 @@ class DateTimeSearchKeyConverter:
|
|
|
44
44
|
if logger is not None:
|
|
45
45
|
self.logger = logger
|
|
46
46
|
else:
|
|
47
|
-
self.logger = logging.getLogger()
|
|
47
|
+
self.logger = logging.getLogger("muted_logger")
|
|
48
48
|
self.logger.setLevel("FATAL")
|
|
49
49
|
self.generated_features: List[str] = []
|
|
50
50
|
self.bundle = bundle or get_custom_bundle()
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas.api.types import is_numeric_dtype
|
|
7
|
+
|
|
8
|
+
from upgini.errors import ValidationError
|
|
9
|
+
from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
|
|
10
|
+
from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
|
|
11
|
+
from upgini.sampler.random_under_sampler import RandomUnderSampler
|
|
12
|
+
from upgini.utils.warning_counter import WarningCounter
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
|
|
16
|
+
if isinstance(y, pd.Series):
|
|
17
|
+
return y.astype(str).astype("category").cat.codes
|
|
18
|
+
elif isinstance(y, np.ndarray):
|
|
19
|
+
return pd.Series(y).astype(str).astype("category").cat.codes.values
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def define_task(
|
|
23
|
+
y: pd.Series, has_date: bool = False, logger: Optional[logging.Logger] = None, silent: bool = False
|
|
24
|
+
) -> ModelTaskType:
|
|
25
|
+
if logger is None:
|
|
26
|
+
logger = logging.getLogger()
|
|
27
|
+
target = y.dropna()
|
|
28
|
+
if is_numeric_dtype(target):
|
|
29
|
+
target = target.loc[np.isfinite(target)]
|
|
30
|
+
else:
|
|
31
|
+
target = target.loc[target != ""]
|
|
32
|
+
if len(target) == 0:
|
|
33
|
+
raise ValidationError(bundle.get("empty_target"))
|
|
34
|
+
target_items = target.nunique()
|
|
35
|
+
if target_items == 1:
|
|
36
|
+
raise ValidationError(bundle.get("dataset_constant_target"))
|
|
37
|
+
if target_items == 2:
|
|
38
|
+
task = ModelTaskType.BINARY
|
|
39
|
+
else:
|
|
40
|
+
try:
|
|
41
|
+
target = pd.to_numeric(target)
|
|
42
|
+
is_numeric = True
|
|
43
|
+
except Exception:
|
|
44
|
+
is_numeric = False
|
|
45
|
+
|
|
46
|
+
# If any value is non numeric - multiclass
|
|
47
|
+
if not is_numeric:
|
|
48
|
+
task = ModelTaskType.MULTICLASS
|
|
49
|
+
else:
|
|
50
|
+
if target.nunique() <= 50 and is_int_encoding(target.unique()):
|
|
51
|
+
task = ModelTaskType.MULTICLASS
|
|
52
|
+
elif has_date:
|
|
53
|
+
task = ModelTaskType.REGRESSION
|
|
54
|
+
else:
|
|
55
|
+
non_zero_target = target[target != 0]
|
|
56
|
+
target_items = non_zero_target.nunique()
|
|
57
|
+
target_ratio = target_items / len(non_zero_target)
|
|
58
|
+
if (
|
|
59
|
+
(target.dtype.kind == "f" and np.any(target != target.astype(int))) # any non integer
|
|
60
|
+
or target_items > 50
|
|
61
|
+
or target_ratio > 0.2
|
|
62
|
+
):
|
|
63
|
+
task = ModelTaskType.REGRESSION
|
|
64
|
+
else:
|
|
65
|
+
task = ModelTaskType.MULTICLASS
|
|
66
|
+
|
|
67
|
+
logger.info(f"Detected task type: {task}")
|
|
68
|
+
if not silent:
|
|
69
|
+
print(bundle.get("target_type_detected").format(task))
|
|
70
|
+
return task
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def is_int_encoding(unique_values):
|
|
74
|
+
return set(unique_values) == set(range(len(unique_values))) or set(unique_values) == set(
|
|
75
|
+
range(1, len(unique_values) + 1)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def balance_undersample(
|
|
80
|
+
df: pd.DataFrame,
|
|
81
|
+
target_column: str,
|
|
82
|
+
task_type: ModelTaskType,
|
|
83
|
+
random_state: int,
|
|
84
|
+
imbalance_threshold: int = 0.2,
|
|
85
|
+
min_sample_threshold: int = 5000,
|
|
86
|
+
binary_bootstrap_loops: int = 5,
|
|
87
|
+
multiclass_bootstrap_loops: int = 2,
|
|
88
|
+
logger: Optional[logging.Logger] = None,
|
|
89
|
+
bundle: Optional[ResourceBundle] = None,
|
|
90
|
+
warning_counter: Optional[WarningCounter] = None,
|
|
91
|
+
) -> pd.DataFrame:
|
|
92
|
+
if logger is None:
|
|
93
|
+
logger = logging.getLogger("muted_logger")
|
|
94
|
+
logger.setLevel("FATAL")
|
|
95
|
+
bundle = bundle or get_custom_bundle()
|
|
96
|
+
if SYSTEM_RECORD_ID not in df.columns:
|
|
97
|
+
raise Exception("System record id must be presented for undersampling")
|
|
98
|
+
|
|
99
|
+
count = len(df)
|
|
100
|
+
target = df[target_column].copy()
|
|
101
|
+
target_classes_count = target.nunique()
|
|
102
|
+
|
|
103
|
+
vc = target.value_counts()
|
|
104
|
+
max_class_value = vc.index[0]
|
|
105
|
+
min_class_value = vc.index[len(vc) - 1]
|
|
106
|
+
max_class_count = vc[max_class_value]
|
|
107
|
+
min_class_count = vc[min_class_value]
|
|
108
|
+
|
|
109
|
+
min_class_percent = imbalance_threshold / target_classes_count
|
|
110
|
+
min_class_threshold = min_class_percent * count
|
|
111
|
+
|
|
112
|
+
resampled_data = df
|
|
113
|
+
df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
|
|
114
|
+
if task_type == ModelTaskType.MULTICLASS:
|
|
115
|
+
# Sort classes by rows count and find 25% quantile class
|
|
116
|
+
classes = vc.index
|
|
117
|
+
quantile25_idx = int(0.75 * len(classes)) - 1
|
|
118
|
+
quantile25_class = classes[quantile25_idx]
|
|
119
|
+
quantile25_class_cnt = vc[quantile25_class]
|
|
120
|
+
|
|
121
|
+
if max_class_count > (quantile25_class_cnt * multiclass_bootstrap_loops):
|
|
122
|
+
msg = bundle.get("imbalance_multiclass").format(quantile25_class, quantile25_class_cnt)
|
|
123
|
+
logger.warning(msg)
|
|
124
|
+
print(msg)
|
|
125
|
+
if warning_counter:
|
|
126
|
+
warning_counter.increment()
|
|
127
|
+
|
|
128
|
+
# 25% and lower classes will stay as is. Higher classes will be downsampled
|
|
129
|
+
sample_strategy = dict()
|
|
130
|
+
for class_idx in range(quantile25_idx):
|
|
131
|
+
# compare class count with count_of_quantile25_class * 2
|
|
132
|
+
class_value = classes[class_idx]
|
|
133
|
+
class_count = vc[class_value]
|
|
134
|
+
sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
|
|
135
|
+
sampler = RandomUnderSampler(
|
|
136
|
+
sampling_strategy=sample_strategy, random_state=random_state
|
|
137
|
+
)
|
|
138
|
+
X = df[SYSTEM_RECORD_ID]
|
|
139
|
+
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
140
|
+
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
141
|
+
|
|
142
|
+
resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
143
|
+
elif len(df) > min_sample_threshold and min_class_count < min_sample_threshold / 2:
|
|
144
|
+
msg = bundle.get("dataset_rarest_class_less_threshold").format(
|
|
145
|
+
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
146
|
+
)
|
|
147
|
+
logger.warning(msg)
|
|
148
|
+
print(msg)
|
|
149
|
+
if warning_counter:
|
|
150
|
+
warning_counter.increment()
|
|
151
|
+
|
|
152
|
+
# fill up to min_sample_threshold by majority class
|
|
153
|
+
minority_class = df[df[target_column] == min_class_value]
|
|
154
|
+
majority_class = df[df[target_column] != min_class_value]
|
|
155
|
+
sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
|
|
156
|
+
sampled_majority_class = majority_class.sample(
|
|
157
|
+
n=sample_size, random_state=random_state
|
|
158
|
+
)
|
|
159
|
+
resampled_data = df[
|
|
160
|
+
(df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
161
|
+
| (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
elif max_class_count > min_class_count * binary_bootstrap_loops:
|
|
165
|
+
msg = bundle.get("dataset_rarest_class_less_threshold").format(
|
|
166
|
+
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
167
|
+
)
|
|
168
|
+
logger.warning(msg)
|
|
169
|
+
print(msg)
|
|
170
|
+
if warning_counter:
|
|
171
|
+
warning_counter.increment()
|
|
172
|
+
|
|
173
|
+
sampler = RandomUnderSampler(
|
|
174
|
+
sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
|
|
175
|
+
)
|
|
176
|
+
X = df[SYSTEM_RECORD_ID]
|
|
177
|
+
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
178
|
+
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
179
|
+
|
|
180
|
+
resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
181
|
+
|
|
182
|
+
logger.info(f"Shape after rebalance resampling: {resampled_data}")
|
|
183
|
+
return resampled_data
|
|
@@ -260,11 +260,13 @@ def test_imbalanced_target():
|
|
|
260
260
|
}
|
|
261
261
|
dataset.task_type = ModelTaskType.MULTICLASS
|
|
262
262
|
dataset._Dataset__resample()
|
|
263
|
-
assert len(dataset) ==
|
|
263
|
+
assert len(dataset) == 1800
|
|
264
264
|
value_counts = dataset.data["target"].value_counts()
|
|
265
265
|
assert len(value_counts) == 4
|
|
266
|
-
|
|
267
|
-
|
|
266
|
+
assert value_counts["a"] == 100
|
|
267
|
+
assert value_counts["b"] == 400
|
|
268
|
+
assert value_counts["c"] == 500
|
|
269
|
+
assert value_counts["d"] == 800
|
|
268
270
|
|
|
269
271
|
|
|
270
272
|
def test_fail_on_small_class_observations():
|
|
@@ -2163,6 +2163,7 @@ def test_idempotent_order_with_imbalanced_dataset(requests_mock: Mocker):
|
|
|
2163
2163
|
pass
|
|
2164
2164
|
|
|
2165
2165
|
actual_result_df = result_wrapper.df.sort_values(by="system_record_id").reset_index(drop=True)
|
|
2166
|
+
# actual_result_df.to_parquet(expected_result_path)
|
|
2166
2167
|
assert_frame_equal(actual_result_df, expected_result_df)
|
|
2167
2168
|
|
|
2168
2169
|
for i in range(5):
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pytest
|
|
4
|
+
from pandas.testing import assert_frame_equal
|
|
5
|
+
|
|
6
|
+
from upgini.errors import ValidationError
|
|
7
|
+
from upgini.metadata import SYSTEM_RECORD_ID, TARGET, ModelTaskType
|
|
8
|
+
from upgini.resource_bundle import bundle
|
|
9
|
+
from upgini.utils.target_utils import balance_undersample, define_task
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_invalid_target():
|
|
13
|
+
y = pd.Series(["", "", ""])
|
|
14
|
+
with pytest.raises(ValidationError, match=bundle.get("empty_target")):
|
|
15
|
+
define_task(y)
|
|
16
|
+
|
|
17
|
+
y = pd.Series([np.nan, np.inf, -np.inf])
|
|
18
|
+
with pytest.raises(ValidationError, match=bundle.get("empty_target")):
|
|
19
|
+
define_task(y)
|
|
20
|
+
|
|
21
|
+
y = pd.Series([1, 1, 1, 1, 1])
|
|
22
|
+
with pytest.raises(ValidationError, match=bundle.get("dataset_constant_target")):
|
|
23
|
+
define_task(y)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_define_binary_task_type():
|
|
27
|
+
y = pd.Series([0, 1, 0, 1, 0, 1])
|
|
28
|
+
assert define_task(y, False) == ModelTaskType.BINARY
|
|
29
|
+
assert define_task(y, True) == ModelTaskType.BINARY
|
|
30
|
+
|
|
31
|
+
y = pd.Series(["a", "b", "a", "b", "a"])
|
|
32
|
+
assert define_task(y, False) == ModelTaskType.BINARY
|
|
33
|
+
assert define_task(y, True) == ModelTaskType.BINARY
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_define_multiclass_task_type():
|
|
37
|
+
y = pd.Series(range(1, 51))
|
|
38
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
39
|
+
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
40
|
+
|
|
41
|
+
y = pd.Series([float(x) for x in range(1, 51)])
|
|
42
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
43
|
+
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
44
|
+
|
|
45
|
+
y = pd.Series(range(0, 50))
|
|
46
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
47
|
+
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
48
|
+
|
|
49
|
+
y = pd.Series(["a", "b", "c", "b", "a"])
|
|
50
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
51
|
+
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
52
|
+
|
|
53
|
+
y = pd.Series(["0", "1", "2", "3", "a"])
|
|
54
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
55
|
+
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
56
|
+
|
|
57
|
+
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 3.0, 5.0, 3.0])
|
|
58
|
+
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_define_regression_task_type():
|
|
62
|
+
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 3.0, 5.0, 3.0])
|
|
63
|
+
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
64
|
+
|
|
65
|
+
y = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.5])
|
|
66
|
+
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
67
|
+
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
68
|
+
|
|
69
|
+
y = pd.Series([0, 1, 2, 3, 4, 5, 6, 8])
|
|
70
|
+
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
71
|
+
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
72
|
+
|
|
73
|
+
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0])
|
|
74
|
+
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
75
|
+
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_balance_undersampling_binary():
|
|
79
|
+
df = pd.DataFrame({SYSTEM_RECORD_ID: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], TARGET: [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]})
|
|
80
|
+
balanced_df = balance_undersample(
|
|
81
|
+
df, TARGET, ModelTaskType.BINARY, 42, imbalance_threshold=0.1, min_sample_threshold=2
|
|
82
|
+
)
|
|
83
|
+
# Get all minority class and 5x of majority class if minority class count (1)
|
|
84
|
+
# more or equal to min_sample_threshold/2 (1)
|
|
85
|
+
expected_df = pd.DataFrame({
|
|
86
|
+
SYSTEM_RECORD_ID: [1, 2, 3, 7, 9, 10],
|
|
87
|
+
TARGET: [0, 1, 0, 0, 0, 0]
|
|
88
|
+
})
|
|
89
|
+
assert_frame_equal(balanced_df.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True), expected_df)
|
|
90
|
+
|
|
91
|
+
balanced_df = balance_undersample(
|
|
92
|
+
df, TARGET, ModelTaskType.BINARY, 42, imbalance_threshold=0.1, min_sample_threshold=8
|
|
93
|
+
)
|
|
94
|
+
# Get all minority class and fill up to min_sample_threshold (8) by majority class
|
|
95
|
+
expected_df = pd.DataFrame({
|
|
96
|
+
SYSTEM_RECORD_ID: [1, 2, 3, 4, 6, 7, 9, 10],
|
|
97
|
+
TARGET: [0, 1, 0, 0, 0, 0, 0, 0]
|
|
98
|
+
})
|
|
99
|
+
assert_frame_equal(balanced_df.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True), expected_df)
|
|
100
|
+
|
|
101
|
+
df = pd.DataFrame({"system_record_id": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], TARGET: [0, 1, 0, 0, 0, 0, 0, 0, 1, 0]})
|
|
102
|
+
balanced_df = balance_undersample(
|
|
103
|
+
df, "target", ModelTaskType.BINARY, 42, imbalance_threshold=0.1, min_sample_threshold=4
|
|
104
|
+
)
|
|
105
|
+
# Get full dataset if majority class count (8) less than x5 of minority class count (2)
|
|
106
|
+
assert_frame_equal(balanced_df, df)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def test_balance_undersaampling_multiclass():
|
|
110
|
+
df = pd.DataFrame({
|
|
111
|
+
SYSTEM_RECORD_ID: [1, 2, 3, 4, 5, 6],
|
|
112
|
+
TARGET: ["a", "b", "c", "c", "b", "c"]
|
|
113
|
+
# a - 1, b - 2, c - 3
|
|
114
|
+
})
|
|
115
|
+
balanced_df = balance_undersample(
|
|
116
|
+
df, TARGET, ModelTaskType.MULTICLASS, 42, imbalance_threshold=0.1, min_sample_threshold=10
|
|
117
|
+
)
|
|
118
|
+
# Get full dataset if majority class count (3) less than x2 of 25% class (b) count (2)
|
|
119
|
+
assert_frame_equal(balanced_df, df)
|
|
120
|
+
|
|
121
|
+
df = pd.DataFrame({
|
|
122
|
+
SYSTEM_RECORD_ID: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
|
|
123
|
+
TARGET: ["a", "b", "c", "c", "c", "b", "c", "d", "d", "d", "c"]
|
|
124
|
+
# a - 1, b - 2, c - 5, d - 3
|
|
125
|
+
})
|
|
126
|
+
balanced_df = balance_undersample(
|
|
127
|
+
df, TARGET, ModelTaskType.MULTICLASS, 42, imbalance_threshold=0.1, min_sample_threshold=10
|
|
128
|
+
)
|
|
129
|
+
expected_df = pd.DataFrame({
|
|
130
|
+
SYSTEM_RECORD_ID: [1, 2, 3, 4, 5, 6, 8, 9, 10, 11],
|
|
131
|
+
TARGET: ["a", "b", "c", "c", "c", "b", "d", "d", "d", "c"]
|
|
132
|
+
})
|
|
133
|
+
# Get all of 25% quantile class (b) and minor classes (a) and x2 (or all if less) of major classes
|
|
134
|
+
assert_frame_equal(balanced_df.sort_values(by=SYSTEM_RECORD_ID).reset_index(drop=True), expected_df)
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* FingerprintJS v3.4.2 - Copyright (c) FingerprintJS, Inc, 2023 (https://fingerprint.com)
|
|
3
|
-
* Licensed under the MIT (http://www.opensource.org/licenses/mit-license.php) license.
|
|
4
|
-
*
|
|
5
|
-
* This software contains code from open-source projects:
|
|
6
|
-
* MurmurHash3 by Karan Lyons (https://github.com/karanlyons/murmurHash3.js)
|
|
7
|
-
*/
|
|
8
|
-
var e=function(){return e=Object.assign||function(e){for(var n,t=1,r=arguments.length;t<r;t++)for(var o in n=arguments[t])Object.prototype.hasOwnProperty.call(n,o)&&(e[o]=n[o]);return e},e.apply(this,arguments)};function n(e,n,t,r){return new(t||(t=Promise))((function(o,a){function i(e){try{u(r.next(e))}catch(n){a(n)}}function c(e){try{u(r.throw(e))}catch(n){a(n)}}function u(e){var n;e.done?o(e.value):(n=e.value,n instanceof t?n:new t((function(e){e(n)}))).then(i,c)}u((r=r.apply(e,n||[])).next())}))}function t(e,n){var t,r,o,a,i={label:0,sent:function(){if(1&o[0])throw o[1];return o[1]},trys:[],ops:[]};return a={next:c(0),throw:c(1),return:c(2)},"function"==typeof Symbol&&(a[Symbol.iterator]=function(){return this}),a;function c(c){return function(u){return function(c){if(t)throw new TypeError("Generator is already executing.");for(;a&&(a=0,c[0]&&(i=0)),i;)try{if(t=1,r&&(o=2&c[0]?r.return:c[0]?r.throw||((o=r.return)&&o.call(r),0):r.next)&&!(o=o.call(r,c[1])).done)return o;switch(r=0,o&&(c=[2&c[0],o.value]),c[0]){case 0:case 1:o=c;break;case 4:return i.label++,{value:c[1],done:!1};case 5:i.label++,r=c[1],c=[0];continue;case 7:c=i.ops.pop(),i.trys.pop();continue;default:if(!(o=i.trys,(o=o.length>0&&o[o.length-1])||6!==c[0]&&2!==c[0])){i=0;continue}if(3===c[0]&&(!o||c[1]>o[0]&&c[1]<o[3])){i.label=c[1];break}if(6===c[0]&&i.label<o[1]){i.label=o[1],o=c;break}if(o&&i.label<o[2]){i.label=o[2],i.ops.push(c);break}o[2]&&i.ops.pop(),i.trys.pop();continue}c=n.call(e,i)}catch(u){c=[6,u],r=0}finally{t=o=0}if(5&c[0])throw c[1];return{value:c[0]?c[1]:void 0,done:!0}}([c,u])}}}function r(e,n,t){if(t||2===arguments.length)for(var r,o=0,a=n.length;o<a;o++)!r&&o in n||(r||(r=Array.prototype.slice.call(n,0,o)),r[o]=n[o]);return e.concat(r||Array.prototype.slice.call(n))}function o(e,n){return new Promise((function(t){return setTimeout(t,e,n)}))}function a(e){return!!e&&"function"==typeof e.then}function i(e,n){try{var t=e();a(t)?t.then((function(e){return n(!0,e)}),(function(e){return n(!1,e)})):n(!0,t)}catch(r){n(!1,r)}}function c(e,r,a){return void 0===a&&(a=16),n(this,void 0,void 0,(function(){var n,i,c,u;return t(this,(function(t){switch(t.label){case 0:n=Array(e.length),i=Date.now(),c=0,t.label=1;case 1:return c<e.length?(n[c]=r(e[c],c),(u=Date.now())>=i+a?(i=u,[4,o(0)]):[3,3]):[3,4];case 2:t.sent(),t.label=3;case 3:return++c,[3,1];case 4:return[2,n]}}))}))}function u(e){e.then(void 0,(function(){}))}function l(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]+n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]+n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]+n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]+n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function s(e,n){e=[e[0]>>>16,65535&e[0],e[1]>>>16,65535&e[1]],n=[n[0]>>>16,65535&n[0],n[1]>>>16,65535&n[1]];var t=[0,0,0,0];return t[3]+=e[3]*n[3],t[2]+=t[3]>>>16,t[3]&=65535,t[2]+=e[2]*n[3],t[1]+=t[2]>>>16,t[2]&=65535,t[2]+=e[3]*n[2],t[1]+=t[2]>>>16,t[2]&=65535,t[1]+=e[1]*n[3],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[2]*n[2],t[0]+=t[1]>>>16,t[1]&=65535,t[1]+=e[3]*n[1],t[0]+=t[1]>>>16,t[1]&=65535,t[0]+=e[0]*n[3]+e[1]*n[2]+e[2]*n[1]+e[3]*n[0],t[0]&=65535,[t[0]<<16|t[1],t[2]<<16|t[3]]}function d(e,n){return 32===(n%=64)?[e[1],e[0]]:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n|e[0]>>>32-n]:(n-=32,[e[1]<<n|e[0]>>>32-n,e[0]<<n|e[1]>>>32-n])}function m(e,n){return 0===(n%=64)?e:n<32?[e[0]<<n|e[1]>>>32-n,e[1]<<n]:[e[1]<<n-32,0]}function f(e,n){return[e[0]^n[0],e[1]^n[1]]}function v(e){return e=f(e,[0,e[0]>>>1]),e=f(e=s(e,[4283543511,3981806797]),[0,e[0]>>>1]),e=f(e=s(e,[3301882366,444984403]),[0,e[0]>>>1])}function h(e,n){n=n||0;var t,r=(e=e||"").length%16,o=e.length-r,a=[0,n],i=[0,n],c=[0,0],u=[0,0],h=[2277735313,289559509],p=[1291169091,658871167];for(t=0;t<o;t+=16)c=[255&e.charCodeAt(t+4)|(255&e.charCodeAt(t+5))<<8|(255&e.charCodeAt(t+6))<<16|(255&e.charCodeAt(t+7))<<24,255&e.charCodeAt(t)|(255&e.charCodeAt(t+1))<<8|(255&e.charCodeAt(t+2))<<16|(255&e.charCodeAt(t+3))<<24],u=[255&e.charCodeAt(t+12)|(255&e.charCodeAt(t+13))<<8|(255&e.charCodeAt(t+14))<<16|(255&e.charCodeAt(t+15))<<24,255&e.charCodeAt(t+8)|(255&e.charCodeAt(t+9))<<8|(255&e.charCodeAt(t+10))<<16|(255&e.charCodeAt(t+11))<<24],c=d(c=s(c,h),31),a=l(a=d(a=f(a,c=s(c,p)),27),i),a=l(s(a,[0,5]),[0,1390208809]),u=d(u=s(u,p),33),i=l(i=d(i=f(i,u=s(u,h)),31),a),i=l(s(i,[0,5]),[0,944331445]);switch(c=[0,0],u=[0,0],r){case 15:u=f(u,m([0,e.charCodeAt(t+14)],48));case 14:u=f(u,m([0,e.charCodeAt(t+13)],40));case 13:u=f(u,m([0,e.charCodeAt(t+12)],32));case 12:u=f(u,m([0,e.charCodeAt(t+11)],24));case 11:u=f(u,m([0,e.charCodeAt(t+10)],16));case 10:u=f(u,m([0,e.charCodeAt(t+9)],8));case 9:u=s(u=f(u,[0,e.charCodeAt(t+8)]),p),i=f(i,u=s(u=d(u,33),h));case 8:c=f(c,m([0,e.charCodeAt(t+7)],56));case 7:c=f(c,m([0,e.charCodeAt(t+6)],48));case 6:c=f(c,m([0,e.charCodeAt(t+5)],40));case 5:c=f(c,m([0,e.charCodeAt(t+4)],32));case 4:c=f(c,m([0,e.charCodeAt(t+3)],24));case 3:c=f(c,m([0,e.charCodeAt(t+2)],16));case 2:c=f(c,m([0,e.charCodeAt(t+1)],8));case 1:c=s(c=f(c,[0,e.charCodeAt(t)]),h),a=f(a,c=s(c=d(c,31),p))}return a=l(a=f(a,[0,e.length]),i=f(i,[0,e.length])),i=l(i,a),a=l(a=v(a),i=v(i)),i=l(i,a),("00000000"+(a[0]>>>0).toString(16)).slice(-8)+("00000000"+(a[1]>>>0).toString(16)).slice(-8)+("00000000"+(i[0]>>>0).toString(16)).slice(-8)+("00000000"+(i[1]>>>0).toString(16)).slice(-8)}function p(e){return parseInt(e)}function b(e){return parseFloat(e)}function y(e,n){return"number"==typeof e&&isNaN(e)?n:e}function g(e){return e.reduce((function(e,n){return e+(n?1:0)}),0)}function w(e,n){if(void 0===n&&(n=1),Math.abs(n)>=1)return Math.round(e/n)*n;var t=1/n;return Math.round(e*t)/t}function L(e){return e&&"object"==typeof e&&"message"in e?e:{message:e}}function k(e){return"function"!=typeof e}function V(e,r,o){var a=Object.keys(e).filter((function(e){return!function(e,n){for(var t=0,r=e.length;t<r;++t)if(e[t]===n)return!0;return!1}(o,e)})),l=c(a,(function(n){return function(e,n){var t=new Promise((function(t){var r=Date.now();i(e.bind(null,n),(function(){for(var e=[],n=0;n<arguments.length;n++)e[n]=arguments[n];var o=Date.now()-r;if(!e[0])return t((function(){return{error:L(e[1]),duration:o}}));var a=e[1];if(k(a))return t((function(){return{value:a,duration:o}}));t((function(){return new Promise((function(e){var n=Date.now();i(a,(function(){for(var t=[],r=0;r<arguments.length;r++)t[r]=arguments[r];var a=o+Date.now()-n;if(!t[0])return e({error:L(t[1]),duration:a});e({value:t[1],duration:a})}))}))}))}))}));return u(t),function(){return t.then((function(e){return e()}))}}(e[n],r)}));return u(l),function(){return n(this,void 0,void 0,(function(){var e,n,r,o;return t(this,(function(t){switch(t.label){case 0:return[4,l];case 1:return[4,c(t.sent(),(function(e){var n=e();return u(n),n}))];case 2:return e=t.sent(),[4,Promise.all(e)];case 3:for(n=t.sent(),r={},o=0;o<a.length;++o)r[a[o]]=n[o];return[2,r]}}))}))}}function Z(e,n){var t=function(e){return k(e)?n(e):function(){var t=e();return a(t)?t.then(n):n(t)}};return function(n){var r=e(n);return a(r)?r.then(t):t(r)}}function W(){var e=window,n=navigator;return g(["MSCSSMatrix"in e,"msSetImmediate"in e,"msIndexedDB"in e,"msMaxTouchPoints"in n,"msPointerEnabled"in n])>=4}function C(){var e=window,n=navigator;return g(["msWriteProfilerMark"in e,"MSStream"in e,"msLaunchUri"in n,"msSaveBlob"in n])>=3&&!W()}function S(){var e=window,n=navigator;return g(["webkitPersistentStorage"in n,"webkitTemporaryStorage"in n,0===n.vendor.indexOf("Google"),"webkitResolveLocalFileSystemURL"in e,"BatteryManager"in e,"webkitMediaStream"in e,"webkitSpeechGrammar"in e])>=5}function x(){var e=window,n=navigator;return g(["ApplePayError"in e,"CSSPrimitiveValue"in e,"Counter"in e,0===n.vendor.indexOf("Apple"),"getStorageUpdates"in n,"WebKitMediaKeys"in e])>=4}function F(){var e=window;return g(["safari"in e,!("DeviceMotionEvent"in e),!("ongestureend"in e),!("standalone"in navigator)])>=3}function Y(){var e,n,t=window;return g(["buildID"in navigator,"MozAppearance"in(null!==(n=null===(e=document.documentElement)||void 0===e?void 0:e.style)&&void 0!==n?n:{}),"onmozfullscreenchange"in t,"mozInnerScreenX"in t,"CSSMozDocumentRule"in t,"CanvasCaptureMediaStream"in t])>=4}function M(){var e=document;return e.fullscreenElement||e.msFullscreenElement||e.mozFullScreenElement||e.webkitFullscreenElement||null}function G(){var e=S(),n=Y();if(!e&&!n)return!1;var t=window;return g(["onorientationchange"in t,"orientation"in t,e&&!("SharedWorker"in t),n&&/android/i.test(navigator.appVersion)])>=2}function R(e){var n=new Error(e);return n.name=e,n}function X(e,r,a){var i,c,u;return void 0===a&&(a=50),n(this,void 0,void 0,(function(){var n,l;return t(this,(function(t){switch(t.label){case 0:n=document,t.label=1;case 1:return n.body?[3,3]:[4,o(a)];case 2:return t.sent(),[3,1];case 3:l=n.createElement("iframe"),t.label=4;case 4:return t.trys.push([4,,10,11]),[4,new Promise((function(e,t){var o=!1,a=function(){o=!0,e()};l.onload=a,l.onerror=function(e){o=!0,t(e)};var i=l.style;i.setProperty("display","block","important"),i.position="absolute",i.top="0",i.left="0",i.visibility="hidden",r&&"srcdoc"in l?l.srcdoc=r:l.src="about:blank",n.body.appendChild(l);var c=function(){var e,n;o||("complete"===(null===(n=null===(e=l.contentWindow)||void 0===e?void 0:e.document)||void 0===n?void 0:n.readyState)?a():setTimeout(c,10))};c()}))];case 5:t.sent(),t.label=6;case 6:return(null===(c=null===(i=l.contentWindow)||void 0===i?void 0:i.document)||void 0===c?void 0:c.body)?[3,8]:[4,o(a)];case 7:return t.sent(),[3,6];case 8:return[4,e(l,l.contentWindow)];case 9:return[2,t.sent()];case 10:return null===(u=l.parentNode)||void 0===u||u.removeChild(l),[7];case 11:return[2]}}))}))}function A(e){for(var n=function(e){for(var n,t,r="Unexpected syntax '".concat(e,"'"),o=/^\s*([a-z-]*)(.*)$/i.exec(e),a=o[1]||void 0,i={},c=/([.:#][\w-]+|\[.+?\])/gi,u=function(e,n){i[e]=i[e]||[],i[e].push(n)};;){var l=c.exec(o[2]);if(!l)break;var s=l[0];switch(s[0]){case".":u("class",s.slice(1));break;case"#":u("id",s.slice(1));break;case"[":var d=/^\[([\w-]+)([~|^$*]?=("(.*?)"|([\w-]+)))?(\s+[is])?\]$/.exec(s);if(!d)throw new Error(r);u(d[1],null!==(t=null!==(n=d[4])&&void 0!==n?n:d[5])&&void 0!==t?t:"");break;default:throw new Error(r)}}return[a,i]}(e),t=n[0],r=n[1],o=document.createElement(null!=t?t:"div"),a=0,i=Object.keys(r);a<i.length;a++){var c=i[a],u=r[c].join(" ");"style"===c?j(o.style,u):o.setAttribute(c,u)}return o}function j(e,n){for(var t=0,r=n.split(";");t<r.length;t++){var o=r[t],a=/^\s*([\w-]+)\s*:\s*(.+?)(\s*!([\w-]+))?\s*$/.exec(o);if(a){var i=a[1],c=a[2],u=a[4];e.setProperty(i,c,u||"")}}}var I=["monospace","sans-serif","serif"],J=["sans-serif-thin","ARNO PRO","Agency FB","Arabic Typesetting","Arial Unicode MS","AvantGarde Bk BT","BankGothic Md BT","Batang","Bitstream Vera Sans Mono","Calibri","Century","Century Gothic","Clarendon","EUROSTILE","Franklin Gothic","Futura Bk BT","Futura Md BT","GOTHAM","Gill Sans","HELV","Haettenschweiler","Helvetica Neue","Humanst521 BT","Leelawadee","Letter Gothic","Levenim MT","Lucida Bright","Lucida Sans","Menlo","MS Mincho","MS Outlook","MS Reference Specialty","MS UI Gothic","MT Extra","MYRIAD PRO","Marlett","Meiryo UI","Microsoft Uighur","Minion Pro","Monotype Corsiva","PMingLiU","Pristina","SCRIPTINA","Segoe UI Light","Serifa","SimHei","Small Fonts","Staccato222 BT","TRAJAN PRO","Univers CE 55 Medium","Vrinda","ZWAdobeF"];function H(e){return e.toDataURL()}var P,N;function z(){var e=this;return function(){if(void 0===N){var e=function(){var n=D();E(n)?N=setTimeout(e,2500):(P=n,N=void 0)};e()}}(),function(){return n(e,void 0,void 0,(function(){var e;return t(this,(function(n){switch(n.label){case 0:return E(e=D())?P?[2,r([],P,!0)]:M()?[4,(t=document,(t.exitFullscreen||t.msExitFullscreen||t.mozCancelFullScreen||t.webkitExitFullscreen).call(t))]:[3,2]:[3,2];case 1:n.sent(),e=D(),n.label=2;case 2:return E(e)||(P=e),[2,e]}var t}))}))}}function D(){var e=screen;return[y(b(e.availTop),null),y(b(e.width)-b(e.availWidth)-y(b(e.availLeft),0),null),y(b(e.height)-b(e.availHeight)-y(b(e.availTop),0),null),y(b(e.availLeft),null)]}function E(e){for(var n=0;n<4;++n)if(e[n])return!1;return!0}function T(e){var r;return n(this,void 0,void 0,(function(){var n,a,i,c,u,l,s;return t(this,(function(t){switch(t.label){case 0:for(n=document,a=n.createElement("div"),i=new Array(e.length),c={},B(a),s=0;s<e.length;++s)"DIALOG"===(u=A(e[s])).tagName&&u.show(),B(l=n.createElement("div")),l.appendChild(u),a.appendChild(l),i[s]=u;t.label=1;case 1:return n.body?[3,3]:[4,o(50)];case 2:return t.sent(),[3,1];case 3:n.body.appendChild(a);try{for(s=0;s<e.length;++s)i[s].offsetParent||(c[e[s]]=!0)}finally{null===(r=a.parentNode)||void 0===r||r.removeChild(a)}return[2,c]}}))}))}function B(e){e.style.setProperty("display","block","important")}function _(e){return matchMedia("(inverted-colors: ".concat(e,")")).matches}function O(e){return matchMedia("(forced-colors: ".concat(e,")")).matches}function U(e){return matchMedia("(prefers-contrast: ".concat(e,")")).matches}function Q(e){return matchMedia("(prefers-reduced-motion: ".concat(e,")")).matches}function K(e){return matchMedia("(dynamic-range: ".concat(e,")")).matches}var q=Math,$=function(){return 0};var ee={default:[],apple:[{font:"-apple-system-body"}],serif:[{fontFamily:"serif"}],sans:[{fontFamily:"sans-serif"}],mono:[{fontFamily:"monospace"}],min:[{fontSize:"1px"}],system:[{fontFamily:"system-ui"}]};var ne={fonts:function(){return X((function(e,n){var t=n.document,r=t.body;r.style.fontSize="48px";var o=t.createElement("div"),a={},i={},c=function(e){var n=t.createElement("span"),r=n.style;return r.position="absolute",r.top="0",r.left="0",r.fontFamily=e,n.textContent="mmMwWLliI0O&1",o.appendChild(n),n},u=I.map(c),l=function(){for(var e={},n=function(n){e[n]=I.map((function(e){return function(e,n){return c("'".concat(e,"',").concat(n))}(n,e)}))},t=0,r=J;t<r.length;t++){n(r[t])}return e}();r.appendChild(o);for(var s=0;s<I.length;s++)a[I[s]]=u[s].offsetWidth,i[I[s]]=u[s].offsetHeight;return J.filter((function(e){return n=l[e],I.some((function(e,t){return n[t].offsetWidth!==a[e]||n[t].offsetHeight!==i[e]}));var n}))}))},domBlockers:function(e){var r=(void 0===e?{}:e).debug;return n(this,void 0,void 0,(function(){var e,n,o,a,i;return t(this,(function(t){switch(t.label){case 0:return x()||G()?(c=atob,e={abpIndo:["#Iklan-Melayang","#Kolom-Iklan-728","#SidebarIklan-wrapper",'[title="ALIENBOLA" i]',c("I0JveC1CYW5uZXItYWRz")],abpvn:[".quangcao","#mobileCatfish",c("LmNsb3NlLWFkcw=="),'[id^="bn_bottom_fixed_"]',"#pmadv"],adBlockFinland:[".mainostila",c("LnNwb25zb3JpdA=="),".ylamainos",c("YVtocmVmKj0iL2NsaWNrdGhyZ2guYXNwPyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hcHAucmVhZHBlYWsuY29tL2FkcyJd")],adBlockPersian:["#navbar_notice_50",".kadr",'TABLE[width="140px"]',"#divAgahi",c("YVtocmVmXj0iaHR0cDovL2cxLnYuZndtcm0ubmV0L2FkLyJd")],adBlockWarningRemoval:["#adblock-honeypot",".adblocker-root",".wp_adblock_detect",c("LmhlYWRlci1ibG9ja2VkLWFk"),c("I2FkX2Jsb2NrZXI=")],adGuardAnnoyances:[".hs-sosyal","#cookieconsentdiv",'div[class^="app_gdpr"]',".as-oil",'[data-cypress="soft-push-notification-modal"]'],adGuardBase:[".BetterJsPopOverlay",c("I2FkXzMwMFgyNTA="),c("I2Jhbm5lcmZsb2F0MjI="),c("I2NhbXBhaWduLWJhbm5lcg=="),c("I0FkLUNvbnRlbnQ=")],adGuardChinese:[c("LlppX2FkX2FfSA=="),c("YVtocmVmKj0iLmh0aGJldDM0LmNvbSJd"),"#widget-quan",c("YVtocmVmKj0iLzg0OTkyMDIwLnh5eiJd"),c("YVtocmVmKj0iLjE5NTZobC5jb20vIl0=")],adGuardFrench:["#pavePub",c("LmFkLWRlc2t0b3AtcmVjdGFuZ2xl"),".mobile_adhesion",".widgetadv",c("LmFkc19iYW4=")],adGuardGerman:['aside[data-portal-id="leaderboard"]'],adGuardJapanese:["#kauli_yad_1",c("YVtocmVmXj0iaHR0cDovL2FkMi50cmFmZmljZ2F0ZS5uZXQvIl0="),c("Ll9wb3BJbl9pbmZpbml0ZV9hZA=="),c("LmFkZ29vZ2xl"),c("Ll9faXNib29zdFJldHVybkFk")],adGuardMobile:[c("YW1wLWF1dG8tYWRz"),c("LmFtcF9hZA=="),'amp-embed[type="24smi"]',"#mgid_iframe1",c("I2FkX2ludmlld19hcmVh")],adGuardRussian:[c("YVtocmVmXj0iaHR0cHM6Ly9hZC5sZXRtZWFkcy5jb20vIl0="),c("LnJlY2xhbWE="),'div[id^="smi2adblock"]',c("ZGl2W2lkXj0iQWRGb3hfYmFubmVyXyJd"),"#psyduckpockeball"],adGuardSocial:[c("YVtocmVmXj0iLy93d3cuc3R1bWJsZXVwb24uY29tL3N1Ym1pdD91cmw9Il0="),c("YVtocmVmXj0iLy90ZWxlZ3JhbS5tZS9zaGFyZS91cmw/Il0="),".etsy-tweet","#inlineShare",".popup-social"],adGuardSpanishPortuguese:["#barraPublicidade","#Publicidade","#publiEspecial","#queTooltip",".cnt-publi"],adGuardTrackingProtection:["#qoo-counter",c("YVtocmVmXj0iaHR0cDovL2NsaWNrLmhvdGxvZy5ydS8iXQ=="),c("YVtocmVmXj0iaHR0cDovL2hpdGNvdW50ZXIucnUvdG9wL3N0YXQucGhwIl0="),c("YVtocmVmXj0iaHR0cDovL3RvcC5tYWlsLnJ1L2p1bXAiXQ=="),"#top100counter"],adGuardTurkish:["#backkapat",c("I3Jla2xhbWk="),c("YVtocmVmXj0iaHR0cDovL2Fkc2Vydi5vbnRlay5jb20udHIvIl0="),c("YVtocmVmXj0iaHR0cDovL2l6bGVuemkuY29tL2NhbXBhaWduLyJd"),c("YVtocmVmXj0iaHR0cDovL3d3dy5pbnN0YWxsYWRzLm5ldC8iXQ==")],bulgarian:[c("dGQjZnJlZW5ldF90YWJsZV9hZHM="),"#ea_intext_div",".lapni-pop-over","#xenium_hot_offers"],easyList:[".yb-floorad",c("LndpZGdldF9wb19hZHNfd2lkZ2V0"),c("LnRyYWZmaWNqdW5reS1hZA=="),".textad_headline",c("LnNwb25zb3JlZC10ZXh0LWxpbmtz")],easyListChina:[c("LmFwcGd1aWRlLXdyYXBbb25jbGljayo9ImJjZWJvcy5jb20iXQ=="),c("LmZyb250cGFnZUFkdk0="),"#taotaole","#aafoot.top_box",".cfa_popup"],easyListCookie:[".ezmob-footer",".cc-CookieWarning","[data-cookie-number]",c("LmF3LWNvb2tpZS1iYW5uZXI="),".sygnal24-gdpr-modal-wrap"],easyListCzechSlovak:["#onlajny-stickers",c("I3Jla2xhbW5pLWJveA=="),c("LnJla2xhbWEtbWVnYWJvYXJk"),".sklik",c("W2lkXj0ic2tsaWtSZWtsYW1hIl0=")],easyListDutch:[c("I2FkdmVydGVudGll"),c("I3ZpcEFkbWFya3RCYW5uZXJCbG9jaw=="),".adstekst",c("YVtocmVmXj0iaHR0cHM6Ly94bHR1YmUubmwvY2xpY2svIl0="),"#semilo-lrectangle"],easyListGermany:["#SSpotIMPopSlider",c("LnNwb25zb3JsaW5rZ3J1ZW4="),c("I3dlcmJ1bmdza3k="),c("I3Jla2xhbWUtcmVjaHRzLW1pdHRl"),c("YVtocmVmXj0iaHR0cHM6Ly9iZDc0Mi5jb20vIl0=")],easyListItaly:[c("LmJveF9hZHZfYW5udW5jaQ=="),".sb-box-pubbliredazionale",c("YVtocmVmXj0iaHR0cDovL2FmZmlsaWF6aW9uaWFkcy5zbmFpLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZHNlcnZlci5odG1sLml0LyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9hZmZpbGlhemlvbmlhZHMuc25haS5pdC8iXQ==")],easyListLithuania:[c("LnJla2xhbW9zX3RhcnBhcw=="),c("LnJla2xhbW9zX251b3JvZG9z"),c("aW1nW2FsdD0iUmVrbGFtaW5pcyBza3lkZWxpcyJd"),c("aW1nW2FsdD0iRGVkaWt1b3RpLmx0IHNlcnZlcmlhaSJd"),c("aW1nW2FsdD0iSG9zdGluZ2FzIFNlcnZlcmlhaS5sdCJd")],estonian:[c("QVtocmVmKj0iaHR0cDovL3BheTRyZXN1bHRzMjQuZXUiXQ==")],fanboyAnnoyances:["#ac-lre-player",".navigate-to-top","#subscribe_popup",".newsletter_holder","#back-top"],fanboyAntiFacebook:[".util-bar-module-firefly-visible"],fanboyEnhancedTrackers:[".open.pushModal","#issuem-leaky-paywall-articles-zero-remaining-nag","#sovrn_container",'div[class$="-hide"][zoompage-fontsize][style="display: block;"]',".BlockNag__Card"],fanboySocial:["#FollowUs","#meteored_share","#social_follow",".article-sharer",".community__social-desc"],frellwitSwedish:[c("YVtocmVmKj0iY2FzaW5vcHJvLnNlIl1bdGFyZ2V0PSJfYmxhbmsiXQ=="),c("YVtocmVmKj0iZG9rdG9yLXNlLm9uZWxpbmsubWUiXQ=="),"article.category-samarbete",c("ZGl2LmhvbGlkQWRz"),"ul.adsmodern"],greekAdBlock:[c("QVtocmVmKj0iYWRtYW4ub3RlbmV0LmdyL2NsaWNrPyJd"),c("QVtocmVmKj0iaHR0cDovL2F4aWFiYW5uZXJzLmV4b2R1cy5nci8iXQ=="),c("QVtocmVmKj0iaHR0cDovL2ludGVyYWN0aXZlLmZvcnRobmV0LmdyL2NsaWNrPyJd"),"DIV.agores300","TABLE.advright"],hungarian:["#cemp_doboz",".optimonk-iframe-container",c("LmFkX19tYWlu"),c("W2NsYXNzKj0iR29vZ2xlQWRzIl0="),"#hirdetesek_box"],iDontCareAboutCookies:['.alert-info[data-block-track*="CookieNotice"]',".ModuleTemplateCookieIndicator",".o--cookies--container","#cookies-policy-sticky","#stickyCookieBar"],icelandicAbp:[c("QVtocmVmXj0iL2ZyYW1ld29yay9yZXNvdXJjZXMvZm9ybXMvYWRzLmFzcHgiXQ==")],latvian:[c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiAxMjBweDsgaGVpZ2h0OiA0MHB4OyBvdmVyZmxvdzogaGlkZGVuOyBwb3NpdGlvbjogcmVsYXRpdmU7Il0="),c("YVtocmVmPSJodHRwOi8vd3d3LnNhbGlkemluaS5sdi8iXVtzdHlsZT0iZGlzcGxheTogYmxvY2s7IHdpZHRoOiA4OHB4OyBoZWlnaHQ6IDMxcHg7IG92ZXJmbG93OiBoaWRkZW47IHBvc2l0aW9uOiByZWxhdGl2ZTsiXQ==")],listKr:[c("YVtocmVmKj0iLy9hZC5wbGFuYnBsdXMuY28ua3IvIl0="),c("I2xpdmVyZUFkV3JhcHBlcg=="),c("YVtocmVmKj0iLy9hZHYuaW1hZHJlcC5jby5rci8iXQ=="),c("aW5zLmZhc3R2aWV3LWFk"),".revenue_unit_item.dable"],listeAr:[c("LmdlbWluaUxCMUFk"),".right-and-left-sponsers",c("YVtocmVmKj0iLmFmbGFtLmluZm8iXQ=="),c("YVtocmVmKj0iYm9vcmFxLm9yZyJd"),c("YVtocmVmKj0iZHViaXp6bGUuY29tL2FyLz91dG1fc291cmNlPSJd")],listeFr:[c("YVtocmVmXj0iaHR0cDovL3Byb21vLnZhZG9yLmNvbS8iXQ=="),c("I2FkY29udGFpbmVyX3JlY2hlcmNoZQ=="),c("YVtocmVmKj0id2Vib3JhbWEuZnIvZmNnaS1iaW4vIl0="),".site-pub-interstitiel",'div[id^="crt-"][data-criteo-id]'],officialPolish:["#ceneo-placeholder-ceneo-12",c("W2hyZWZePSJodHRwczovL2FmZi5zZW5kaHViLnBsLyJd"),c("YVtocmVmXj0iaHR0cDovL2Fkdm1hbmFnZXIudGVjaGZ1bi5wbC9yZWRpcmVjdC8iXQ=="),c("YVtocmVmXj0iaHR0cDovL3d3dy50cml6ZXIucGwvP3V0bV9zb3VyY2UiXQ=="),c("ZGl2I3NrYXBpZWNfYWQ=")],ro:[c("YVtocmVmXj0iLy9hZmZ0cmsuYWx0ZXgucm8vQ291bnRlci9DbGljayJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ibGFja2ZyaWRheXNhbGVzLnJvL3Ryay9zaG9wLyJd"),c("YVtocmVmXj0iaHR0cHM6Ly9ldmVudC4ycGVyZm9ybWFudC5jb20vZXZlbnRzL2NsaWNrIl0="),c("YVtocmVmXj0iaHR0cHM6Ly9sLnByb2ZpdHNoYXJlLnJvLyJd"),'a[href^="/url/"]'],ruAd:[c("YVtocmVmKj0iLy9mZWJyYXJlLnJ1LyJd"),c("YVtocmVmKj0iLy91dGltZy5ydS8iXQ=="),c("YVtocmVmKj0iOi8vY2hpa2lkaWtpLnJ1Il0="),"#pgeldiz",".yandex-rtb-block"],thaiAds:["a[href*=macau-uta-popup]",c("I2Fkcy1nb29nbGUtbWlkZGxlX3JlY3RhbmdsZS1ncm91cA=="),c("LmFkczMwMHM="),".bumq",".img-kosana"],webAnnoyancesUltralist:["#mod-social-share-2","#social-tools",c("LmN0cGwtZnVsbGJhbm5lcg=="),".zergnet-recommend",".yt.btn-link.btn-md.btn"]},n=Object.keys(e),[4,T((i=[]).concat.apply(i,n.map((function(n){return e[n]}))))]):[2,void 0];case 1:return o=t.sent(),r&&function(e,n){for(var t="DOM blockers debug:\n```",r=0,o=Object.keys(e);r<o.length;r++){var a=o[r];t+="\n".concat(a,":");for(var i=0,c=e[a];i<c.length;i++){var u=c[i];t+="\n ".concat(n[u]?"🚫":"➡️"," ").concat(u)}}console.log("".concat(t,"\n```"))}(e,o),(a=n.filter((function(n){var t=e[n];return g(t.map((function(e){return o[e]})))>.6*t.length}))).sort(),[2,a]}var c}))}))},fontPreferences:function(){return function(e,n){void 0===n&&(n=4e3);return X((function(t,o){var a=o.document,i=a.body,c=i.style;c.width="".concat(n,"px"),c.webkitTextSizeAdjust=c.textSizeAdjust="none",S()?i.style.zoom="".concat(1/o.devicePixelRatio):x()&&(i.style.zoom="reset");var u=a.createElement("div");return u.textContent=r([],Array(n/20<<0),!0).map((function(){return"word"})).join(" "),i.appendChild(u),e(a,i)}),'<!doctype html><html><head><meta name="viewport" content="width=device-width, initial-scale=1">')}((function(e,n){for(var t={},r={},o=0,a=Object.keys(ee);o<a.length;o++){var i=a[o],c=ee[i],u=c[0],l=void 0===u?{}:u,s=c[1],d=void 0===s?"mmMwWLliI0fiflO&1":s,m=e.createElement("span");m.textContent=d,m.style.whiteSpace="nowrap";for(var f=0,v=Object.keys(l);f<v.length;f++){var h=v[f],p=l[h];void 0!==p&&(m.style[h]=p)}t[i]=m,n.appendChild(e.createElement("br")),n.appendChild(m)}for(var b=0,y=Object.keys(ee);b<y.length;b++){r[i=y[b]]=t[i].getBoundingClientRect().width}return r}))},audio:function(){var e=window,n=e.OfflineAudioContext||e.webkitOfflineAudioContext;if(!n)return-2;if(x()&&!F()&&!function(){var e=window;return g(["DOMRectList"in e,"RTCPeerConnectionIceEvent"in e,"SVGGeometryElement"in e,"ontransitioncancel"in e])>=3}())return-1;var t=new n(1,5e3,44100),r=t.createOscillator();r.type="triangle",r.frequency.value=1e4;var o=t.createDynamicsCompressor();o.threshold.value=-50,o.knee.value=40,o.ratio.value=12,o.attack.value=0,o.release.value=.25,r.connect(o),o.connect(t.destination),r.start(0);var i=function(e){var n=3,t=500,r=500,o=5e3,i=function(){};return[new Promise((function(c,l){var s=!1,d=0,m=0;e.oncomplete=function(e){return c(e.renderedBuffer)};var f=function(){setTimeout((function(){return l(R("timeout"))}),Math.min(r,m+o-Date.now()))},v=function(){try{var r=e.startRendering();switch(a(r)&&u(r),e.state){case"running":m=Date.now(),s&&f();break;case"suspended":document.hidden||d++,s&&d>=n?l(R("suspended")):setTimeout(v,t)}}catch(o){l(o)}};v(),i=function(){s||(s=!0,m>0&&f())}})),i]}(t),c=i[0],l=i[1],s=c.then((function(e){return function(e){for(var n=0,t=0;t<e.length;++t)n+=Math.abs(e[t]);return n}(e.getChannelData(0).subarray(4500))}),(function(e){if("timeout"===e.name||"suspended"===e.name)return-3;throw e}));return u(s),function(){return l(),s}},screenFrame:function(){var e=this,r=z();return function(){return n(e,void 0,void 0,(function(){var e,n;return t(this,(function(t){switch(t.label){case 0:return[4,r()];case 1:return e=t.sent(),[2,[(n=function(e){return null===e?null:w(e,10)})(e[0]),n(e[1]),n(e[2]),n(e[3])]]}}))}))}},osCpu:function(){return navigator.oscpu},languages:function(){var e,n=navigator,t=[],r=n.language||n.userLanguage||n.browserLanguage||n.systemLanguage;if(void 0!==r&&t.push([r]),Array.isArray(n.languages))S()&&g([!("MediaSettingsRange"in(e=window)),"RTCEncodedAudioFrame"in e,""+e.Intl=="[object Intl]",""+e.Reflect=="[object Reflect]"])>=3||t.push(n.languages);else if("string"==typeof n.languages){var o=n.languages;o&&t.push(o.split(","))}return t},colorDepth:function(){return window.screen.colorDepth},deviceMemory:function(){return y(b(navigator.deviceMemory),void 0)},screenResolution:function(){var e=screen,n=function(e){return y(p(e),null)},t=[n(e.width),n(e.height)];return t.sort().reverse(),t},hardwareConcurrency:function(){return y(p(navigator.hardwareConcurrency),void 0)},timezone:function(){var e,n=null===(e=window.Intl)||void 0===e?void 0:e.DateTimeFormat;if(n){var t=(new n).resolvedOptions().timeZone;if(t)return t}var r,o=(r=(new Date).getFullYear(),-Math.max(b(new Date(r,0,1).getTimezoneOffset()),b(new Date(r,6,1).getTimezoneOffset())));return"UTC".concat(o>=0?"+":"").concat(Math.abs(o))},sessionStorage:function(){try{return!!window.sessionStorage}catch(e){return!0}},localStorage:function(){try{return!!window.localStorage}catch(e){return!0}},indexedDB:function(){if(!W()&&!C())try{return!!window.indexedDB}catch(e){return!0}},openDatabase:function(){return!!window.openDatabase},cpuClass:function(){return navigator.cpuClass},platform:function(){var e=navigator.platform;return"MacIntel"===e&&x()&&!F()?function(){if("iPad"===navigator.platform)return!0;var e=screen,n=e.width/e.height;return g(["MediaSource"in window,!!Element.prototype.webkitRequestFullscreen,n>.65&&n<1.53])>=2}()?"iPad":"iPhone":e},plugins:function(){var e=navigator.plugins;if(e){for(var n=[],t=0;t<e.length;++t){var r=e[t];if(r){for(var o=[],a=0;a<r.length;++a){var i=r[a];o.push({type:i.type,suffixes:i.suffixes})}n.push({name:r.name,description:r.description,mimeTypes:o})}}return n}},canvas:function(){var e,n,t=!1,r=function(){var e=document.createElement("canvas");return e.width=1,e.height=1,[e,e.getContext("2d")]}(),o=r[0],a=r[1];if(function(e,n){return!(!n||!e.toDataURL)}(o,a)){t=function(e){return e.rect(0,0,10,10),e.rect(2,2,6,6),!e.isPointInPath(5,5,"evenodd")}(a),function(e,n){e.width=240,e.height=60,n.textBaseline="alphabetic",n.fillStyle="#f60",n.fillRect(100,1,62,20),n.fillStyle="#069",n.font='11pt "Times New Roman"';var t="Cwm fjordbank gly ".concat(String.fromCharCode(55357,56835));n.fillText(t,2,15),n.fillStyle="rgba(102, 204, 0, 0.2)",n.font="18pt Arial",n.fillText(t,4,45)}(o,a);var i=H(o);i!==H(o)?e=n="unstable":(n=i,function(e,n){e.width=122,e.height=110,n.globalCompositeOperation="multiply";for(var t=0,r=[["#f2f",40,40],["#2ff",80,40],["#ff2",60,80]];t<r.length;t++){var o=r[t],a=o[0],i=o[1],c=o[2];n.fillStyle=a,n.beginPath(),n.arc(i,c,40,0,2*Math.PI,!0),n.closePath(),n.fill()}n.fillStyle="#f9c",n.arc(60,60,60,0,2*Math.PI,!0),n.arc(60,60,20,0,2*Math.PI,!0),n.fill("evenodd")}(o,a),e=H(o))}else e=n="";return{winding:t,geometry:e,text:n}},touchSupport:function(){var e,n=navigator,t=0;void 0!==n.maxTouchPoints?t=p(n.maxTouchPoints):void 0!==n.msMaxTouchPoints&&(t=n.msMaxTouchPoints);try{document.createEvent("TouchEvent"),e=!0}catch(r){e=!1}return{maxTouchPoints:t,touchEvent:e,touchStart:"ontouchstart"in window}},vendor:function(){return navigator.vendor||""},vendorFlavors:function(){for(var e=[],n=0,t=["chrome","safari","__crWeb","__gCrWeb","yandex","__yb","__ybro","__firefox__","__edgeTrackingPreventionStatistics","webkit","oprt","samsungAr","ucweb","UCShellJava","puffinDevice"];n<t.length;n++){var r=t[n],o=window[r];o&&"object"==typeof o&&e.push(r)}return e.sort()},cookiesEnabled:function(){var e=document;try{e.cookie="cookietest=1; SameSite=Strict;";var n=-1!==e.cookie.indexOf("cookietest=");return e.cookie="cookietest=1; SameSite=Strict; expires=Thu, 01-Jan-1970 00:00:01 GMT",n}catch(t){return!1}},colorGamut:function(){for(var e=0,n=["rec2020","p3","srgb"];e<n.length;e++){var t=n[e];if(matchMedia("(color-gamut: ".concat(t,")")).matches)return t}},invertedColors:function(){return!!_("inverted")||!_("none")&&void 0},forcedColors:function(){return!!O("active")||!O("none")&&void 0},monochrome:function(){if(matchMedia("(min-monochrome: 0)").matches){for(var e=0;e<=100;++e)if(matchMedia("(max-monochrome: ".concat(e,")")).matches)return e;throw new Error("Too high value")}},contrast:function(){return U("no-preference")?0:U("high")||U("more")?1:U("low")||U("less")?-1:U("forced")?10:void 0},reducedMotion:function(){return!!Q("reduce")||!Q("no-preference")&&void 0},hdr:function(){return!!K("high")||!K("standard")&&void 0},math:function(){var e,n=q.acos||$,t=q.acosh||$,r=q.asin||$,o=q.asinh||$,a=q.atanh||$,i=q.atan||$,c=q.sin||$,u=q.sinh||$,l=q.cos||$,s=q.cosh||$,d=q.tan||$,m=q.tanh||$,f=q.exp||$,v=q.expm1||$,h=q.log1p||$;return{acos:n(.12312423423423424),acosh:t(1e308),acoshPf:(e=1e154,q.log(e+q.sqrt(e*e-1))),asin:r(.12312423423423424),asinh:o(1),asinhPf:function(e){return q.log(e+q.sqrt(e*e+1))}(1),atanh:a(.5),atanhPf:function(e){return q.log((1+e)/(1-e))/2}(.5),atan:i(.5),sin:c(-1e300),sinh:u(1),sinhPf:function(e){return q.exp(e)-1/q.exp(e)/2}(1),cos:l(10.000000000123),cosh:s(1),coshPf:function(e){return(q.exp(e)+1/q.exp(e))/2}(1),tan:d(-1e300),tanh:m(1),tanhPf:function(e){return(q.exp(2*e)-1)/(q.exp(2*e)+1)}(1),exp:f(1),expm1:v(1),expm1Pf:function(e){return q.exp(e)-1}(1),log1p:h(10),log1pPf:function(e){return q.log(1+e)}(10),powPI:function(e){return q.pow(q.PI,e)}(-100)}},videoCard:function(){var e,n=document.createElement("canvas"),t=null!==(e=n.getContext("webgl"))&&void 0!==e?e:n.getContext("experimental-webgl");if(t&&"getExtension"in t){var r=t.getExtension("WEBGL_debug_renderer_info");if(r)return{vendor:(t.getParameter(r.UNMASKED_VENDOR_WEBGL)||"").toString(),renderer:(t.getParameter(r.UNMASKED_RENDERER_WEBGL)||"").toString()}}},pdfViewerEnabled:function(){return navigator.pdfViewerEnabled},architecture:function(){var e=new Float32Array(1),n=new Uint8Array(e.buffer);return e[0]=1/0,e[0]=e[0]-e[0],n[3]}};function te(e){var n=function(e){if(G())return.4;if(x())return F()?.5:.3;var n=e.platform.value||"";if(/^Win/.test(n))return.6;if(/^Mac/.test(n))return.5;return.7}(e),t=function(e){return w(.99+.01*e,1e-4)}(n);return{score:n,comment:"$ if upgrade to Pro: https://fpjs.dev/pro".replace(/\$/g,"".concat(t))}}function re(n){return JSON.stringify(n,(function(n,t){return t instanceof Error?e({name:(r=t).name,message:r.message,stack:null===(o=r.stack)||void 0===o?void 0:o.split("\n")},r):t;var r,o}),2)}function oe(e){return h(function(e){for(var n="",t=0,r=Object.keys(e).sort();t<r.length;t++){var o=r[t],a=e[o],i=a.error?"error":JSON.stringify(a.value);n+="".concat(n?"|":"").concat(o.replace(/([:|\\])/g,"\\$1"),":").concat(i)}return n}(e))}function ae(e){return void 0===e&&(e=50),function(e,n){void 0===n&&(n=1/0);var t=window.requestIdleCallback;return t?new Promise((function(e){return t.call(window,(function(){return e()}),{timeout:n})})):o(Math.min(e,n))}(e,2*e)}function ie(e,r){var o=Date.now();return{get:function(a){return n(this,void 0,void 0,(function(){var n,i,c;return t(this,(function(t){switch(t.label){case 0:return n=Date.now(),[4,e()];case 1:return i=t.sent(),c=function(e){var n;return{get visitorId(){return void 0===n&&(n=oe(this.components)),n},set visitorId(e){n=e},confidence:te(e),components:e,version:"3.4.2"}}(i),(r||(null==a?void 0:a.debug))&&console.log("Copy the text below to get the debug data:\n\n```\nversion: ".concat(c.version,"\nuserAgent: ").concat(navigator.userAgent,"\ntimeBetweenLoadAndGet: ").concat(n-o,"\nvisitorId: ").concat(c.visitorId,"\ncomponents: ").concat(re(i),"\n```")),[2,c]}}))}))}}}function ce(e){var r=void 0===e?{}:e,o=r.delayFallback,a=r.debug;return r.monitoring,n(this,void 0,void 0,(function(){return t(this,(function(e){switch(e.label){case 0:return[4,ae(o)];case 1:return e.sent(),[2,ie(V(ne,{debug:a},[]),a)]}}))}))}var ue={load:ce,hashComponents:oe,componentsToDebugString:re},le=h;export{re as componentsToDebugString,ue as default,M as getFullscreenElement,z as getScreenFrame,oe as hashComponents,G as isAndroid,S as isChromium,F as isDesktopSafari,C as isEdgeHTML,Y as isGecko,W as isTrident,x as isWebKit,ce as load,V as loadSources,le as murmurX64Hash128,ae as prepareForSources,ne as sources,Z as transformSource,X as withIframe};
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from typing import Optional, Union
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from pandas.api.types import is_numeric_dtype
|
|
7
|
-
|
|
8
|
-
from upgini.errors import ValidationError
|
|
9
|
-
from upgini.metadata import ModelTaskType
|
|
10
|
-
from upgini.resource_bundle import bundle
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
|
|
14
|
-
if isinstance(y, pd.Series):
|
|
15
|
-
return y.astype(str).astype("category").cat.codes
|
|
16
|
-
elif isinstance(y, np.ndarray):
|
|
17
|
-
return pd.Series(y).astype(str).astype("category").cat.codes.values
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def define_task(
|
|
21
|
-
y: pd.Series, has_date: bool = False, logger: Optional[logging.Logger] = None, silent: bool = False
|
|
22
|
-
) -> ModelTaskType:
|
|
23
|
-
if logger is None:
|
|
24
|
-
logger = logging.getLogger()
|
|
25
|
-
target = y.dropna()
|
|
26
|
-
if is_numeric_dtype(target):
|
|
27
|
-
target = target.loc[np.isfinite(target)]
|
|
28
|
-
else:
|
|
29
|
-
target = target.loc[target != ""]
|
|
30
|
-
if len(target) == 0:
|
|
31
|
-
raise ValidationError(bundle.get("empty_target"))
|
|
32
|
-
target_items = target.nunique()
|
|
33
|
-
if target_items == 1:
|
|
34
|
-
raise ValidationError(bundle.get("dataset_constant_target"))
|
|
35
|
-
if target_items == 2:
|
|
36
|
-
task = ModelTaskType.BINARY
|
|
37
|
-
else:
|
|
38
|
-
try:
|
|
39
|
-
target = pd.to_numeric(target)
|
|
40
|
-
is_numeric = True
|
|
41
|
-
except Exception:
|
|
42
|
-
is_numeric = False
|
|
43
|
-
|
|
44
|
-
# If any value is non numeric - multiclass
|
|
45
|
-
if not is_numeric:
|
|
46
|
-
task = ModelTaskType.MULTICLASS
|
|
47
|
-
else:
|
|
48
|
-
if target.nunique() <= 50 and is_int_encoding(target.unique()):
|
|
49
|
-
task = ModelTaskType.MULTICLASS
|
|
50
|
-
elif has_date:
|
|
51
|
-
task = ModelTaskType.REGRESSION
|
|
52
|
-
else:
|
|
53
|
-
non_zero_target = target[target != 0]
|
|
54
|
-
target_items = non_zero_target.nunique()
|
|
55
|
-
target_ratio = target_items / len(non_zero_target)
|
|
56
|
-
if (
|
|
57
|
-
(target.dtype.kind == "f" and np.any(target != target.astype(int))) # any non integer
|
|
58
|
-
or target_items > 50
|
|
59
|
-
or target_ratio > 0.2
|
|
60
|
-
):
|
|
61
|
-
task = ModelTaskType.REGRESSION
|
|
62
|
-
else:
|
|
63
|
-
task = ModelTaskType.MULTICLASS
|
|
64
|
-
|
|
65
|
-
logger.info(f"Detected task type: {task}")
|
|
66
|
-
if not silent:
|
|
67
|
-
print(bundle.get("target_type_detected").format(task))
|
|
68
|
-
return task
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def is_int_encoding(unique_values):
|
|
72
|
-
return set(unique_values) == set(range(len(unique_values))) or set(unique_values) == set(
|
|
73
|
-
range(1, len(unique_values) + 1)
|
|
74
|
-
)
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from upgini.errors import ValidationError
|
|
6
|
-
from upgini.metadata import ModelTaskType
|
|
7
|
-
from upgini.resource_bundle import bundle
|
|
8
|
-
from upgini.utils.target_utils import define_task
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def test_invalid_target():
|
|
12
|
-
y = pd.Series(["", "", ""])
|
|
13
|
-
with pytest.raises(ValidationError, match=bundle.get("empty_target")):
|
|
14
|
-
define_task(y)
|
|
15
|
-
|
|
16
|
-
y = pd.Series([np.nan, np.inf, -np.inf])
|
|
17
|
-
with pytest.raises(ValidationError, match=bundle.get("empty_target")):
|
|
18
|
-
define_task(y)
|
|
19
|
-
|
|
20
|
-
y = pd.Series([1, 1, 1, 1, 1])
|
|
21
|
-
with pytest.raises(ValidationError, match=bundle.get("dataset_constant_target")):
|
|
22
|
-
define_task(y)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def test_define_binary_task_type():
|
|
26
|
-
y = pd.Series([0, 1, 0, 1, 0, 1])
|
|
27
|
-
assert define_task(y, False) == ModelTaskType.BINARY
|
|
28
|
-
assert define_task(y, True) == ModelTaskType.BINARY
|
|
29
|
-
|
|
30
|
-
y = pd.Series(["a", "b", "a", "b", "a"])
|
|
31
|
-
assert define_task(y, False) == ModelTaskType.BINARY
|
|
32
|
-
assert define_task(y, True) == ModelTaskType.BINARY
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def test_define_multiclass_task_type():
|
|
36
|
-
y = pd.Series(range(1, 51))
|
|
37
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
38
|
-
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
39
|
-
|
|
40
|
-
y = pd.Series([float(x) for x in range(1, 51)])
|
|
41
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
42
|
-
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
43
|
-
|
|
44
|
-
y = pd.Series(range(0, 50))
|
|
45
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
46
|
-
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
47
|
-
|
|
48
|
-
y = pd.Series(["a", "b", "c", "b", "a"])
|
|
49
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
50
|
-
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
51
|
-
|
|
52
|
-
y = pd.Series(["0", "1", "2", "3", "a"])
|
|
53
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
54
|
-
assert define_task(y, True) == ModelTaskType.MULTICLASS
|
|
55
|
-
|
|
56
|
-
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 3.0, 5.0, 3.0])
|
|
57
|
-
assert define_task(y, False) == ModelTaskType.MULTICLASS
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def test_define_regression_task_type():
|
|
61
|
-
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 5.0, 0.0, 5.0, 0.0, 3.0, 0.0, 3.0, 5.0, 3.0])
|
|
62
|
-
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
63
|
-
|
|
64
|
-
y = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.5])
|
|
65
|
-
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
66
|
-
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
67
|
-
|
|
68
|
-
y = pd.Series([0, 1, 2, 3, 4, 5, 6, 8])
|
|
69
|
-
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
70
|
-
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
71
|
-
|
|
72
|
-
y = pd.Series([0.0, 3.0, 5.0, 0.0, 5.0, 0.0, 3.0])
|
|
73
|
-
assert define_task(y, False) == ModelTaskType.REGRESSION
|
|
74
|
-
assert define_task(y, True) == ModelTaskType.REGRESSION
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.260 → upgini-1.1.261a3233.post5}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|