upgini 1.1.236a2__tar.gz → 1.1.237a2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.1.236a2/src/upgini.egg-info → upgini-1.1.237a2}/PKG-INFO +1 -1
- {upgini-1.1.236a2 → upgini-1.1.237a2}/setup.py +1 -1
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/dataset.py +10 -1
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/features_enricher.py +17 -8
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/resource_bundle/strings.properties +1 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/datetime_utils.py +16 -3
- upgini-1.1.237a2/src/upgini/utils/deduplicate_utils.py +72 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini.egg-info/SOURCES.txt +1 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/LICENSE +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/README.md +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/pyproject.toml +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/setup.cfg +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/ads.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/errors.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/http.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/metadata.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/metrics.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/search_task.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/spinner.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_country_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_email_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_metrics.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.236a2 → upgini-1.1.237a2}/tests/test_widget.py +0 -0
|
@@ -36,12 +36,14 @@ from upgini.metadata import (
|
|
|
36
36
|
NumericInterval,
|
|
37
37
|
RuntimeParameters,
|
|
38
38
|
SearchCustomization,
|
|
39
|
+
SearchKey,
|
|
39
40
|
)
|
|
40
41
|
from upgini.normalizer.phone_normalizer import PhoneNormalizer
|
|
41
42
|
from upgini.resource_bundle import bundle
|
|
42
43
|
from upgini.sampler.random_under_sampler import RandomUnderSampler
|
|
43
44
|
from upgini.search_task import SearchTask
|
|
44
45
|
from upgini.utils import combine_search_keys
|
|
46
|
+
from upgini.utils.deduplicate_utils import remove_fintech_duplicates
|
|
45
47
|
from upgini.utils.email_utils import EmailSearchKeyConverter
|
|
46
48
|
|
|
47
49
|
try:
|
|
@@ -382,7 +384,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
382
384
|
|
|
383
385
|
if is_string_dtype(self.data[postal_code]):
|
|
384
386
|
try:
|
|
385
|
-
self.data[postal_code] = self.data[postal_code].astype("
|
|
387
|
+
self.data[postal_code] = self.data[postal_code].astype("float64").astype("Int64").astype("string")
|
|
386
388
|
except Exception:
|
|
387
389
|
pass
|
|
388
390
|
elif is_float_dtype(self.data[postal_code]):
|
|
@@ -820,6 +822,13 @@ class Dataset: # (pd.DataFrame):
|
|
|
820
822
|
self.__validate_dataset(validate_target, silent_mode)
|
|
821
823
|
|
|
822
824
|
if validate_target:
|
|
825
|
+
search_keys = {
|
|
826
|
+
col: SearchKey.from_meaning_type(key_type)
|
|
827
|
+
for col, key_type in self.meaning_types.items()
|
|
828
|
+
if SearchKey.from_meaning_type(key_type) is not None
|
|
829
|
+
}
|
|
830
|
+
self.data = remove_fintech_duplicates(self.data, search_keys, self.logger)
|
|
831
|
+
|
|
823
832
|
self.__validate_target()
|
|
824
833
|
|
|
825
834
|
self.__resample()
|
|
@@ -64,6 +64,7 @@ from upgini.utils.datetime_utils import (
|
|
|
64
64
|
is_blocked_time_series,
|
|
65
65
|
is_time_series,
|
|
66
66
|
)
|
|
67
|
+
from upgini.utils.deduplicate_utils import remove_fintech_duplicates
|
|
67
68
|
from upgini.utils.display_utils import (
|
|
68
69
|
display_html_dataframe,
|
|
69
70
|
do_without_pandas_limits,
|
|
@@ -1183,8 +1184,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1183
1184
|
converter = DateTimeSearchKeyConverter(date_column, self.date_format, self.logger)
|
|
1184
1185
|
extended_X = converter.convert(extended_X, keep_time=True)
|
|
1185
1186
|
generated_features.extend(converter.generated_features)
|
|
1186
|
-
email_column = self.
|
|
1187
|
-
hem_column = self.
|
|
1187
|
+
email_column = self._get_email_column(search_keys)
|
|
1188
|
+
hem_column = self._get_hem_column(search_keys)
|
|
1188
1189
|
if email_column:
|
|
1189
1190
|
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
1190
1191
|
extended_X = converter.convert(extended_X)
|
|
@@ -1505,6 +1506,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1505
1506
|
eval_df_with_index[TARGET] = eval_y
|
|
1506
1507
|
eval_df_with_index[EVAL_SET_INDEX] = idx + 1
|
|
1507
1508
|
df_with_eval_set_index = pd.concat([df_with_eval_set_index, eval_df_with_index])
|
|
1509
|
+
|
|
1510
|
+
df_with_eval_set_index = remove_fintech_duplicates(df_with_eval_set_index, self.search_keys, self.logger)
|
|
1508
1511
|
|
|
1509
1512
|
# downsample if need to eval_set threshold
|
|
1510
1513
|
num_samples = _num_samples(df_with_eval_set_index)
|
|
@@ -1741,8 +1744,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1741
1744
|
generated_features.extend(converter.generated_features)
|
|
1742
1745
|
else:
|
|
1743
1746
|
self.logger.info("Input dataset hasn't date column")
|
|
1744
|
-
email_column = self.
|
|
1745
|
-
hem_column = self.
|
|
1747
|
+
email_column = self._get_email_column(search_keys)
|
|
1748
|
+
hem_column = self._get_hem_column(search_keys)
|
|
1746
1749
|
email_converted_to_hem = False
|
|
1747
1750
|
if email_column:
|
|
1748
1751
|
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
|
|
@@ -2081,8 +2084,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2081
2084
|
self.fit_generated_features.extend(converter.generated_features)
|
|
2082
2085
|
else:
|
|
2083
2086
|
self.logger.info("Input dataset hasn't date column")
|
|
2084
|
-
email_column = self.
|
|
2085
|
-
hem_column = self.
|
|
2087
|
+
email_column = self._get_email_column(self.fit_search_keys)
|
|
2088
|
+
hem_column = self._get_hem_column(self.fit_search_keys)
|
|
2086
2089
|
email_converted_to_hem = False
|
|
2087
2090
|
if email_column:
|
|
2088
2091
|
converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
|
|
@@ -2615,16 +2618,22 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2615
2618
|
return [col for col, t in search_keys.items() if t not in [SearchKey.DATE, SearchKey.DATETIME]]
|
|
2616
2619
|
|
|
2617
2620
|
@staticmethod
|
|
2618
|
-
def
|
|
2621
|
+
def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2619
2622
|
for col, t in search_keys.items():
|
|
2620
2623
|
if t == SearchKey.EMAIL:
|
|
2621
2624
|
return col
|
|
2622
2625
|
|
|
2623
2626
|
@staticmethod
|
|
2624
|
-
def
|
|
2627
|
+
def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2625
2628
|
for col, t in search_keys.items():
|
|
2626
2629
|
if t == SearchKey.HEM:
|
|
2627
2630
|
return col
|
|
2631
|
+
|
|
2632
|
+
@staticmethod
|
|
2633
|
+
def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
|
|
2634
|
+
for col, t in search_keys.items():
|
|
2635
|
+
if t == SearchKey.PHONE:
|
|
2636
|
+
return col
|
|
2628
2637
|
|
|
2629
2638
|
def __add_fit_system_record_id(
|
|
2630
2639
|
self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
|
|
@@ -142,6 +142,7 @@ dataset_empty_column_names=Some column names are empty. Add names please
|
|
|
142
142
|
dataset_too_long_column_name=Column {} is too long: {} characters. Remove this column or trim length to 50 characters
|
|
143
143
|
dataset_full_duplicates=\nWARNING: {:.5f}% of the rows are fully duplicated
|
|
144
144
|
dataset_diff_target_duplicates=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates with different y values. These rows will be deleted as incorrect\nIncorrect row indexes: {}
|
|
145
|
+
dataset_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
|
|
145
146
|
dataset_drop_old_dates=\nWARNING: We don't have data before '2000-01-01' and removed all earlier records from the search dataset
|
|
146
147
|
dataset_all_dates_old=There is empty train dataset after removing data before '2000-01-01'
|
|
147
148
|
dataset_invalid_target_type=Unexpected dtype of target for binary task type: {}. Expected int or bool
|
|
@@ -61,9 +61,22 @@ class DateTimeSearchKeyConverter:
|
|
|
61
61
|
elif is_period_dtype(df[self.date_column]):
|
|
62
62
|
df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
|
|
63
63
|
elif is_numeric_dtype(df[self.date_column]):
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
# 315532801 - 2524608001 - seconds
|
|
65
|
+
# 315532801000 - 2524608001000 - milliseconds
|
|
66
|
+
# 315532801000000 - 2524608001000000 - microseconds
|
|
67
|
+
# 315532801000000000 - 2524608001000000000 - nanoseconds
|
|
68
|
+
if df[self.date_column].apply(lambda x: 10**16 < x).all():
|
|
69
|
+
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
|
|
70
|
+
elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
|
|
71
|
+
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
|
|
72
|
+
elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
|
|
73
|
+
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
|
|
74
|
+
elif df[self.date_column].apply(lambda x: 0 < x < 10*11).all():
|
|
75
|
+
df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
|
|
76
|
+
else:
|
|
77
|
+
msg = f"Unsupported type of date column {self.date_column}. Convert to datetime please."
|
|
78
|
+
self.logger.warning(msg)
|
|
79
|
+
raise ValidationError(msg)
|
|
67
80
|
|
|
68
81
|
# If column with date is datetime then extract seconds of the day and minute of the hour
|
|
69
82
|
# as additional features
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
from typing import Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from upgini.metadata import TARGET, ModelTaskType, SearchKey
|
|
7
|
+
from upgini.resource_bundle import bundle
|
|
8
|
+
from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
|
|
9
|
+
from upgini.utils.target_utils import define_task
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def remove_fintech_duplicates(df: pd.DataFrame,
|
|
13
|
+
search_keys: Dict[str, SearchKey],
|
|
14
|
+
logger: Optional[Logger] = None) -> pd.DataFrame:
|
|
15
|
+
if define_task(df.target, silent=True) != ModelTaskType.BINARY:
|
|
16
|
+
return df
|
|
17
|
+
|
|
18
|
+
date_col = _get_column_by_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
|
|
19
|
+
if date_col is None:
|
|
20
|
+
return df
|
|
21
|
+
|
|
22
|
+
personal_cols = []
|
|
23
|
+
phone_col = _get_column_by_key(search_keys, SearchKey.PHONE)
|
|
24
|
+
if phone_col:
|
|
25
|
+
personal_cols.append(phone_col)
|
|
26
|
+
email_col = _get_column_by_key(search_keys, SearchKey.EMAIL)
|
|
27
|
+
if email_col:
|
|
28
|
+
personal_cols.append(email_col)
|
|
29
|
+
hem_col = _get_column_by_key(search_keys, SearchKey.HEM)
|
|
30
|
+
if hem_col:
|
|
31
|
+
personal_cols.append(hem_col)
|
|
32
|
+
if len(personal_cols) == 0:
|
|
33
|
+
return df
|
|
34
|
+
|
|
35
|
+
duplicates = df.duplicated(personal_cols, keep=False)
|
|
36
|
+
duplicate_rows = df[duplicates]
|
|
37
|
+
if len(duplicate_rows) == 0:
|
|
38
|
+
return df
|
|
39
|
+
|
|
40
|
+
grouped_by_personal_cols = df.groupby(personal_cols, group_keys=False)
|
|
41
|
+
|
|
42
|
+
uniques = grouped_by_personal_cols[date_col].nunique()
|
|
43
|
+
total = len(uniques)
|
|
44
|
+
diff_dates = len(uniques[uniques > 1])
|
|
45
|
+
if diff_dates / total >= 0.6:
|
|
46
|
+
return df
|
|
47
|
+
|
|
48
|
+
if grouped_by_personal_cols[TARGET].apply(lambda x: len(x.unique()) == 1).all():
|
|
49
|
+
return df
|
|
50
|
+
|
|
51
|
+
def has_diff_target_within_60_days(rows):
|
|
52
|
+
rows = rows.sort_values(by=date_col)
|
|
53
|
+
return len(rows[rows[TARGET].ne(rows[TARGET].shift()) & (rows[date_col].diff() < 60 * 24 * 60 * 60 * 1000)]) > 0
|
|
54
|
+
|
|
55
|
+
df = DateTimeSearchKeyConverter(date_col).convert(df)
|
|
56
|
+
grouped_by_personal_cols = df.groupby(personal_cols, group_keys=False)
|
|
57
|
+
rows_with_diff_target = grouped_by_personal_cols.filter(has_diff_target_within_60_days)
|
|
58
|
+
if len(rows_with_diff_target) > 0:
|
|
59
|
+
perc = len(rows_with_diff_target) * 100 / len(df)
|
|
60
|
+
msg = bundle.get("dataset_diff_target_duplicates_fintech").format(perc, len(rows_with_diff_target), rows_with_diff_target.index.to_list())
|
|
61
|
+
print(msg)
|
|
62
|
+
if logger:
|
|
63
|
+
logger.warning(msg)
|
|
64
|
+
df = df[~df.index.isin(rows_with_diff_target.index)]
|
|
65
|
+
|
|
66
|
+
return df
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _get_column_by_key(search_keys: Dict[str, SearchKey], keys: Union[SearchKey, List[SearchKey]]) -> Optional[str]:
|
|
70
|
+
for col, key_type in search_keys.items():
|
|
71
|
+
if (isinstance(keys, list) and key_type in keys) or key_type == keys:
|
|
72
|
+
return col
|
|
@@ -49,6 +49,7 @@ src/upgini/utils/country_utils.py
|
|
|
49
49
|
src/upgini/utils/custom_loss_utils.py
|
|
50
50
|
src/upgini/utils/cv_utils.py
|
|
51
51
|
src/upgini/utils/datetime_utils.py
|
|
52
|
+
src/upgini/utils/deduplicate_utils.py
|
|
52
53
|
src/upgini/utils/display_utils.py
|
|
53
54
|
src/upgini/utils/email_utils.py
|
|
54
55
|
src/upgini/utils/fallback_progress_bar.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|