upgini 1.1.253a5__py3-none-any.whl → 1.1.253a3261__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/data_source/data_source_publisher.py +1 -1
- upgini/dataset.py +57 -20
- upgini/features_enricher.py +15 -15
- upgini/search_task.py +1 -1
- upgini/utils/datetime_utils.py +1 -1
- upgini/utils/target_utils.py +2 -111
- {upgini-1.1.253a5.dist-info → upgini-1.1.253a3261.dist-info}/METADATA +3 -2
- {upgini-1.1.253a5.dist-info → upgini-1.1.253a3261.dist-info}/RECORD +11 -11
- {upgini-1.1.253a5.dist-info → upgini-1.1.253a3261.dist-info}/LICENSE +0 -0
- {upgini-1.1.253a5.dist-info → upgini-1.1.253a3261.dist-info}/WHEEL +0 -0
- {upgini-1.1.253a5.dist-info → upgini-1.1.253a3261.dist-info}/top_level.txt +0 -0
upgini/dataset.py
CHANGED
|
@@ -39,10 +39,10 @@ from upgini.metadata import (
|
|
|
39
39
|
)
|
|
40
40
|
from upgini.normalizer.phone_normalizer import PhoneNormalizer
|
|
41
41
|
from upgini.resource_bundle import ResourceBundle, get_custom_bundle
|
|
42
|
+
from upgini.sampler.random_under_sampler import RandomUnderSampler
|
|
42
43
|
from upgini.search_task import SearchTask
|
|
43
44
|
from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
|
|
44
45
|
from upgini.utils.email_utils import EmailSearchKeyConverter
|
|
45
|
-
from upgini.utils.target_utils import balance_undersample
|
|
46
46
|
|
|
47
47
|
try:
|
|
48
48
|
from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
|
|
@@ -61,8 +61,6 @@ class Dataset: # (pd.DataFrame):
|
|
|
61
61
|
FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
|
|
62
62
|
MIN_SAMPLE_THRESHOLD = 5_000
|
|
63
63
|
IMBALANCE_THESHOLD = 0.4
|
|
64
|
-
BINARY_BOOTSTRAP_LOOPS = 5
|
|
65
|
-
MULTICLASS_BOOTSTRAP_LOOPS = 2
|
|
66
64
|
MIN_TARGET_CLASS_ROWS = 100
|
|
67
65
|
MAX_MULTICLASS_CLASS_COUNT = 100
|
|
68
66
|
MIN_SUPPORTED_DATE_TS = 946684800000 # 2000-01-01
|
|
@@ -463,8 +461,10 @@ class Dataset: # (pd.DataFrame):
|
|
|
463
461
|
self.task_type == ModelTaskType.BINARY and len(train_segment) > self.MIN_SAMPLE_THRESHOLD
|
|
464
462
|
):
|
|
465
463
|
count = len(train_segment)
|
|
466
|
-
|
|
467
|
-
|
|
464
|
+
min_class_count = count
|
|
465
|
+
min_class_value = None
|
|
466
|
+
target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, "")
|
|
467
|
+
target = train_segment[target_column].copy()
|
|
468
468
|
target_classes_count = target.nunique()
|
|
469
469
|
|
|
470
470
|
if target_classes_count > self.MAX_MULTICLASS_CLASS_COUNT:
|
|
@@ -474,9 +474,12 @@ class Dataset: # (pd.DataFrame):
|
|
|
474
474
|
self.logger.warning(msg)
|
|
475
475
|
raise ValidationError(msg)
|
|
476
476
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
477
|
+
unique_target = target.unique()
|
|
478
|
+
for v in list(unique_target): # type: ignore
|
|
479
|
+
current_class_count = len(train_segment.loc[target == v])
|
|
480
|
+
if current_class_count < min_class_count:
|
|
481
|
+
min_class_count = current_class_count
|
|
482
|
+
min_class_value = v
|
|
480
483
|
|
|
481
484
|
if min_class_count < self.MIN_TARGET_CLASS_ROWS:
|
|
482
485
|
msg = self.bundle.get("dataset_rarest_class_less_min").format(
|
|
@@ -489,19 +492,53 @@ class Dataset: # (pd.DataFrame):
|
|
|
489
492
|
min_class_threshold = min_class_percent * count
|
|
490
493
|
|
|
491
494
|
if min_class_count < min_class_threshold:
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
df=train_segment,
|
|
495
|
-
target_column=target_column,
|
|
496
|
-
task_type=self.task_type,
|
|
497
|
-
random_state=self.random_state,
|
|
498
|
-
imbalance_threshold=self.IMBALANCE_THESHOLD,
|
|
499
|
-
binary_bootstrap_loops=self.BINARY_BOOTSTRAP_LOOPS,
|
|
500
|
-
multiclass_bootstrap_loops=self.MULTICLASS_BOOTSTRAP_LOOPS,
|
|
501
|
-
logger=self.logger,
|
|
502
|
-
bundle=self.bundle,
|
|
503
|
-
warning_counter=self.warning_counter,
|
|
495
|
+
msg = self.bundle.get("dataset_rarest_class_less_threshold").format(
|
|
496
|
+
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
504
497
|
)
|
|
498
|
+
self.logger.warning(msg)
|
|
499
|
+
print(msg)
|
|
500
|
+
self.warning_counter.increment()
|
|
501
|
+
|
|
502
|
+
train_segment = train_segment.copy().sort_values(by=SYSTEM_RECORD_ID)
|
|
503
|
+
if self.task_type == ModelTaskType.MULTICLASS:
|
|
504
|
+
# Sort classes by rows count and find 25% quantile class
|
|
505
|
+
classes = target.value_counts().index
|
|
506
|
+
quantile25_idx = int(0.75 * len(classes))
|
|
507
|
+
quantile25_class = classes[quantile25_idx]
|
|
508
|
+
count_of_quantile25_class = len(target[target == quantile25_class])
|
|
509
|
+
msg = self.bundle.get("imbalance_multiclass").format(quantile25_class, count_of_quantile25_class)
|
|
510
|
+
self.logger.warning(msg)
|
|
511
|
+
print(msg)
|
|
512
|
+
# 25% and lower classes will stay as is. Higher classes will be downsampled
|
|
513
|
+
parts = []
|
|
514
|
+
for class_idx in range(quantile25_idx):
|
|
515
|
+
sampled = train_segment[train_segment[target_column] == classes[class_idx]].sample(
|
|
516
|
+
n=count_of_quantile25_class, random_state=self.random_state
|
|
517
|
+
)
|
|
518
|
+
parts.append(sampled)
|
|
519
|
+
for class_idx in range(quantile25_idx, len(classes)):
|
|
520
|
+
parts.append(train_segment[train_segment[target_column] == classes[class_idx]])
|
|
521
|
+
resampled_data = pd.concat(parts)
|
|
522
|
+
elif self.task_type == ModelTaskType.BINARY and min_class_count < self.MIN_SAMPLE_THRESHOLD / 2:
|
|
523
|
+
minority_class = train_segment[train_segment[target_column] == min_class_value]
|
|
524
|
+
majority_class = train_segment[train_segment[target_column] != min_class_value]
|
|
525
|
+
sampled_majority_class = majority_class.sample(
|
|
526
|
+
n=self.MIN_SAMPLE_THRESHOLD - min_class_count, random_state=self.random_state
|
|
527
|
+
)
|
|
528
|
+
resampled_data = train_segment[
|
|
529
|
+
(train_segment[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
530
|
+
| (train_segment[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
531
|
+
]
|
|
532
|
+
else:
|
|
533
|
+
sampler = RandomUnderSampler(random_state=self.random_state)
|
|
534
|
+
X = train_segment[SYSTEM_RECORD_ID]
|
|
535
|
+
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
536
|
+
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
537
|
+
resampled_data = train_segment[train_segment[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
538
|
+
|
|
539
|
+
self.data = resampled_data
|
|
540
|
+
self.logger.info(f"Shape after rebalance resampling: {self.data.shape}")
|
|
541
|
+
self.imbalanced = True
|
|
505
542
|
|
|
506
543
|
# Resample over fit threshold
|
|
507
544
|
if not self.imbalanced and EVAL_SET_INDEX in self.data.columns:
|
upgini/features_enricher.py
CHANGED
|
@@ -221,7 +221,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
221
221
|
if logs_enabled:
|
|
222
222
|
self.logger = LoggerFactory().get_logger(endpoint, self._api_key, client_ip, client_visitorid)
|
|
223
223
|
else:
|
|
224
|
-
self.logger = logging.getLogger(
|
|
224
|
+
self.logger = logging.getLogger()
|
|
225
225
|
self.logger.setLevel("FATAL")
|
|
226
226
|
|
|
227
227
|
if len(kwargs) > 0:
|
|
@@ -3047,6 +3047,20 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3047
3047
|
def list_or_single(lst: List[str], single: str):
|
|
3048
3048
|
return lst or ([single] if single else [])
|
|
3049
3049
|
|
|
3050
|
+
def to_anchor(link: str, value: str) -> str:
|
|
3051
|
+
if not value:
|
|
3052
|
+
return ""
|
|
3053
|
+
elif not link:
|
|
3054
|
+
return value
|
|
3055
|
+
elif value == llm_source:
|
|
3056
|
+
return value
|
|
3057
|
+
else:
|
|
3058
|
+
return f"<a href='{link}' target='_blank' rel='noopener noreferrer'>{value}</a>"
|
|
3059
|
+
|
|
3060
|
+
def make_links(names: List[str], links: List[str]):
|
|
3061
|
+
all_links = [to_anchor(link, name) for name, link in itertools.zip_longest(names, links)]
|
|
3062
|
+
return ",".join(all_links)
|
|
3063
|
+
|
|
3050
3064
|
features_meta.sort(key=lambda m: (-m.shap_value, m.name))
|
|
3051
3065
|
for feature_meta in features_meta:
|
|
3052
3066
|
if feature_meta.name in original_names_dict.keys():
|
|
@@ -3072,20 +3086,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3072
3086
|
if len(feature_sample) > 30:
|
|
3073
3087
|
feature_sample = feature_sample[:30] + "..."
|
|
3074
3088
|
|
|
3075
|
-
def to_anchor(link: str, value: str) -> str:
|
|
3076
|
-
if not value:
|
|
3077
|
-
return ""
|
|
3078
|
-
elif not link:
|
|
3079
|
-
return value
|
|
3080
|
-
elif value == llm_source:
|
|
3081
|
-
return value
|
|
3082
|
-
else:
|
|
3083
|
-
return f"<a href='{link}' target='_blank' rel='noopener noreferrer'>{value}</a>"
|
|
3084
|
-
|
|
3085
|
-
def make_links(names: List[str], links: List[str]):
|
|
3086
|
-
all_links = [to_anchor(link, name) for name, link in itertools.zip_longest(names, links)]
|
|
3087
|
-
return ",".join(all_links)
|
|
3088
|
-
|
|
3089
3089
|
internal_provider = feature_meta.data_provider or "Upgini"
|
|
3090
3090
|
providers = list_or_single(feature_meta.data_providers, feature_meta.data_provider)
|
|
3091
3091
|
provider_links = list_or_single(feature_meta.data_provider_links, feature_meta.data_provider_link)
|
upgini/search_task.py
CHANGED
|
@@ -57,7 +57,7 @@ class SearchTask:
|
|
|
57
57
|
if logger is not None:
|
|
58
58
|
self.logger = logger
|
|
59
59
|
else:
|
|
60
|
-
self.logger = logging.getLogger(
|
|
60
|
+
self.logger = logging.getLogger()
|
|
61
61
|
self.logger.setLevel("FATAL")
|
|
62
62
|
self.provider_metadata_v2: Optional[List[ProviderTaskMetadataV2]] = None
|
|
63
63
|
self.unused_features_for_generation: Optional[List[str]] = None
|
upgini/utils/datetime_utils.py
CHANGED
|
@@ -31,7 +31,7 @@ class DateTimeSearchKeyConverter:
|
|
|
31
31
|
if logger is not None:
|
|
32
32
|
self.logger = logger
|
|
33
33
|
else:
|
|
34
|
-
self.logger = logging.getLogger(
|
|
34
|
+
self.logger = logging.getLogger()
|
|
35
35
|
self.logger.setLevel("FATAL")
|
|
36
36
|
self.generated_features: List[str] = []
|
|
37
37
|
self.bundle = bundle or get_custom_bundle()
|
upgini/utils/target_utils.py
CHANGED
|
@@ -6,10 +6,8 @@ import pandas as pd
|
|
|
6
6
|
from pandas.api.types import is_numeric_dtype
|
|
7
7
|
|
|
8
8
|
from upgini.errors import ValidationError
|
|
9
|
-
from upgini.metadata import
|
|
10
|
-
from upgini.resource_bundle import
|
|
11
|
-
from upgini.sampler.random_under_sampler import RandomUnderSampler
|
|
12
|
-
from upgini.utils.warning_counter import WarningCounter
|
|
9
|
+
from upgini.metadata import ModelTaskType
|
|
10
|
+
from upgini.resource_bundle import bundle
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
|
|
@@ -74,110 +72,3 @@ def is_int_encoding(unique_values):
|
|
|
74
72
|
return set(unique_values) == set(range(len(unique_values))) or set(unique_values) == set(
|
|
75
73
|
range(1, len(unique_values) + 1)
|
|
76
74
|
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def balance_undersample(
|
|
80
|
-
df: pd.DataFrame,
|
|
81
|
-
target_column: str,
|
|
82
|
-
task_type: ModelTaskType,
|
|
83
|
-
random_state: int,
|
|
84
|
-
imbalance_threshold: int = 0.2,
|
|
85
|
-
min_sample_threshold: int = 5000,
|
|
86
|
-
binary_bootstrap_loops: int = 5,
|
|
87
|
-
multiclass_bootstrap_loops: int = 2,
|
|
88
|
-
logger: Optional[logging.Logger] = None,
|
|
89
|
-
bundle: Optional[ResourceBundle] = None,
|
|
90
|
-
warning_counter: Optional[WarningCounter] = None,
|
|
91
|
-
) -> pd.DataFrame:
|
|
92
|
-
if logger is None:
|
|
93
|
-
logger = logging.getLogger("muted_logger")
|
|
94
|
-
logger.setLevel("FATAL")
|
|
95
|
-
bundle = bundle or get_custom_bundle()
|
|
96
|
-
if SYSTEM_RECORD_ID not in df.columns:
|
|
97
|
-
raise Exception("System record id must be presented for undersampling")
|
|
98
|
-
|
|
99
|
-
count = len(df)
|
|
100
|
-
target = df[target_column].copy()
|
|
101
|
-
target_classes_count = target.nunique()
|
|
102
|
-
|
|
103
|
-
vc = target.value_counts()
|
|
104
|
-
max_class_value = vc.index[0]
|
|
105
|
-
min_class_value = vc.index[len(vc) - 1]
|
|
106
|
-
max_class_count = vc[max_class_value]
|
|
107
|
-
min_class_count = vc[min_class_value]
|
|
108
|
-
|
|
109
|
-
min_class_percent = imbalance_threshold / target_classes_count
|
|
110
|
-
min_class_threshold = min_class_percent * count
|
|
111
|
-
|
|
112
|
-
resampled_data = df
|
|
113
|
-
df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
|
|
114
|
-
if task_type == ModelTaskType.MULTICLASS:
|
|
115
|
-
# Sort classes by rows count and find 25% quantile class
|
|
116
|
-
classes = vc.index
|
|
117
|
-
quantile25_idx = int(0.75 * len(classes)) - 1
|
|
118
|
-
quantile25_class = classes[quantile25_idx]
|
|
119
|
-
quantile25_class_cnt = vc[quantile25_class]
|
|
120
|
-
|
|
121
|
-
if max_class_count > (quantile25_class_cnt * multiclass_bootstrap_loops):
|
|
122
|
-
msg = bundle.get("imbalance_multiclass").format(quantile25_class, quantile25_class_cnt)
|
|
123
|
-
logger.warning(msg)
|
|
124
|
-
print(msg)
|
|
125
|
-
if warning_counter:
|
|
126
|
-
warning_counter.increment()
|
|
127
|
-
|
|
128
|
-
# 25% and lower classes will stay as is. Higher classes will be downsampled
|
|
129
|
-
sample_strategy = dict()
|
|
130
|
-
for class_idx in range(quantile25_idx):
|
|
131
|
-
# compare class count with count_of_quantile25_class * 2
|
|
132
|
-
class_value = classes[class_idx]
|
|
133
|
-
class_count = vc[class_value]
|
|
134
|
-
sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
|
|
135
|
-
sampler = RandomUnderSampler(
|
|
136
|
-
sampling_strategy=sample_strategy, random_state=random_state
|
|
137
|
-
)
|
|
138
|
-
X = df[SYSTEM_RECORD_ID]
|
|
139
|
-
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
140
|
-
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
141
|
-
|
|
142
|
-
resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
143
|
-
elif len(df) > min_sample_threshold and min_class_count < min_sample_threshold / 2:
|
|
144
|
-
msg = bundle.get("dataset_rarest_class_less_threshold").format(
|
|
145
|
-
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
146
|
-
)
|
|
147
|
-
logger.warning(msg)
|
|
148
|
-
print(msg)
|
|
149
|
-
if warning_counter:
|
|
150
|
-
warning_counter.increment()
|
|
151
|
-
|
|
152
|
-
# fill up to min_sample_threshold by majority class
|
|
153
|
-
minority_class = df[df[target_column] == min_class_value]
|
|
154
|
-
majority_class = df[df[target_column] != min_class_value]
|
|
155
|
-
sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
|
|
156
|
-
sampled_majority_class = majority_class.sample(
|
|
157
|
-
n=sample_size, random_state=random_state
|
|
158
|
-
)
|
|
159
|
-
resampled_data = df[
|
|
160
|
-
(df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
|
|
161
|
-
| (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
|
|
162
|
-
]
|
|
163
|
-
|
|
164
|
-
elif max_class_count > min_class_count * binary_bootstrap_loops:
|
|
165
|
-
msg = bundle.get("dataset_rarest_class_less_threshold").format(
|
|
166
|
-
min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
|
|
167
|
-
)
|
|
168
|
-
logger.warning(msg)
|
|
169
|
-
print(msg)
|
|
170
|
-
if warning_counter:
|
|
171
|
-
warning_counter.increment()
|
|
172
|
-
|
|
173
|
-
sampler = RandomUnderSampler(
|
|
174
|
-
sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
|
|
175
|
-
)
|
|
176
|
-
X = df[SYSTEM_RECORD_ID]
|
|
177
|
-
X = X.to_frame(SYSTEM_RECORD_ID)
|
|
178
|
-
new_x, _ = sampler.fit_resample(X, target) # type: ignore
|
|
179
|
-
|
|
180
|
-
resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
|
|
181
|
-
|
|
182
|
-
logger.info(f"Shape after rebalance resampling: {resampled_data}")
|
|
183
|
-
return resampled_data
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.253a3261
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Home-page: https://upgini.com/
|
|
6
6
|
Author: Upgini Developers
|
|
@@ -18,12 +18,13 @@ Classifier: Intended Audience :: Science/Research
|
|
|
18
18
|
Classifier: Intended Audience :: Telecommunications Industry
|
|
19
19
|
Classifier: License :: OSI Approved :: BSD License
|
|
20
20
|
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
21
22
|
Classifier: Programming Language :: Python :: 3.8
|
|
22
23
|
Classifier: Programming Language :: Python :: 3.9
|
|
23
24
|
Classifier: Programming Language :: Python :: 3.10
|
|
24
25
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
26
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
-
Requires-Python: >=3.
|
|
27
|
+
Requires-Python: >=3.7,<3.11
|
|
27
28
|
Description-Content-Type: text/markdown
|
|
28
29
|
License-File: LICENSE
|
|
29
30
|
Requires-Dist: python-dateutil >=2.8.0
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=tLa0aEcT7XwVJz1AawXCIEj3vxsSBi-geKicuYpRIMw,48196
|
|
4
4
|
upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=dP6Oyhi4erESEGlVFA_j67lqhNqNvbkfM4FGpE_WaTU,171760
|
|
6
6
|
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
7
7
|
upgini/http.py,sha256=eSG4gOpmCGlXmB6KIPNzAG8tRZNUjyYpMeUeHw_2li4,42264
|
|
8
8
|
upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
|
|
9
9
|
upgini/metrics.py,sha256=LS2MgEKgmn9VEXsKzxv3pBZ-q71mTnpWu6vL8fYgpo4,26727
|
|
10
|
-
upgini/search_task.py,sha256=
|
|
10
|
+
upgini/search_task.py,sha256=5n4qGJmtu48s0-FHAtF3L5qVLMd1JVW3FJlM8dFbh-s,17063
|
|
11
11
|
upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
|
|
12
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
13
13
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
|
@@ -21,7 +21,7 @@ upgini/autofe/operand.py,sha256=Rhy7Ky3we-I1Su1--dS4xdsO3K8neV4rqM_Q4xYE4ug,2779
|
|
|
21
21
|
upgini/autofe/unary.py,sha256=gyMkrx9bfa3o19zS-4JaRlScHrfeZGBsYe7d_6ePT-0,2853
|
|
22
22
|
upgini/autofe/vector.py,sha256=Qk7VmdwURNwVw7fIMEspWEo7HTiyUWCYIqu3hcWQQio,507
|
|
23
23
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
upgini/data_source/data_source_publisher.py,sha256=
|
|
24
|
+
upgini/data_source/data_source_publisher.py,sha256=ZMNyh1x1S3QkXkA-PTtBQ-sbOiANtNioEQs8VoQ24Lk,15110
|
|
25
25
|
upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
|
|
26
26
|
upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
|
|
27
27
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -40,7 +40,7 @@ upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6P
|
|
|
40
40
|
upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
|
|
41
41
|
upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
|
|
42
42
|
upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
|
|
43
|
-
upgini/utils/datetime_utils.py,sha256=
|
|
43
|
+
upgini/utils/datetime_utils.py,sha256=P5no4mFgYpEP6oY524ebTKvKc3TBMJzAYpWdj210_Fw,8699
|
|
44
44
|
upgini/utils/deduplicate_utils.py,sha256=ckJrpU8Ruc_vcwIPTopbUjyJuNiseLHNAbQlLfhUCxo,5888
|
|
45
45
|
upgini/utils/display_utils.py,sha256=tiq5sFOfMwkKCjQ7OGdyK_twe0Qdr9F3mzkW1QXSDog,10664
|
|
46
46
|
upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
|
|
@@ -52,11 +52,11 @@ upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,4
|
|
|
52
52
|
upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
|
|
53
53
|
upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
|
|
54
54
|
upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
|
|
55
|
-
upgini/utils/target_utils.py,sha256=
|
|
55
|
+
upgini/utils/target_utils.py,sha256=DH812qcZ7Pvf9WVVb33fbwQjb1W9h1hXRNCCiG7Y6tI,2563
|
|
56
56
|
upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
|
|
57
57
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
58
|
-
upgini-1.1.
|
|
59
|
-
upgini-1.1.
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
58
|
+
upgini-1.1.253a3261.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
59
|
+
upgini-1.1.253a3261.dist-info/METADATA,sha256=rriUm7SPiC9cSL8HPrx8AZ4zVyiOdlHMw5V6rGRy2Bk,48211
|
|
60
|
+
upgini-1.1.253a3261.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
61
|
+
upgini-1.1.253a3261.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
62
|
+
upgini-1.1.253a3261.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|