upgini 1.2.42__tar.gz → 1.2.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.42 → upgini-1.2.44}/PKG-INFO +1 -1
- upgini-1.2.44/src/upgini/__about__.py +1 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/features_enricher.py +21 -11
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/datetime_utils.py +6 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/version_validator.py +6 -7
- upgini-1.2.42/src/upgini/__about__.py +0 -1
- {upgini-1.2.42 → upgini-1.2.44}/.gitignore +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/LICENSE +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/README.md +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/pyproject.toml +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/ads.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/dataset.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/errors.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/http.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/metadata.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/metrics.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/search_task.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/spinner.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.42 → upgini-1.2.44}/src/upgini/utils/warning_counter.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.44"
|
|
@@ -277,7 +277,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
277
277
|
dict()
|
|
278
278
|
)
|
|
279
279
|
|
|
280
|
-
validate_version(self.logger
|
|
280
|
+
validate_version(self.logger)
|
|
281
281
|
|
|
282
282
|
self.search_keys = search_keys or {}
|
|
283
283
|
self.id_columns = id_columns
|
|
@@ -584,6 +584,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
584
584
|
Transformed dataframe, enriched with valuable features.
|
|
585
585
|
"""
|
|
586
586
|
|
|
587
|
+
self.warning_counter.reset()
|
|
587
588
|
trace_id = str(uuid.uuid4())
|
|
588
589
|
start_time = time.time()
|
|
589
590
|
with MDC(trace_id=trace_id):
|
|
@@ -720,7 +721,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
720
721
|
X_new: pandas.DataFrame of shape (n_samples, n_features_new)
|
|
721
722
|
Transformed dataframe, enriched with valuable features.
|
|
722
723
|
"""
|
|
723
|
-
|
|
724
|
+
self.warning_counter.reset()
|
|
724
725
|
search_progress = SearchProgress(0.0, ProgressStage.START_TRANSFORM)
|
|
725
726
|
if progress_callback is not None:
|
|
726
727
|
progress_callback(search_progress)
|
|
@@ -1086,13 +1087,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1086
1087
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
|
1087
1088
|
"quality_metrics_train_segment"
|
|
1088
1089
|
),
|
|
1089
|
-
self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
|
|
1090
|
+
# self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
|
|
1091
|
+
# Show actually used for metrics dataset size
|
|
1092
|
+
self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
|
|
1090
1093
|
}
|
|
1091
1094
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
|
1092
1095
|
validated_y
|
|
1093
1096
|
):
|
|
1094
1097
|
train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
|
1095
|
-
np.mean(validated_y), 4
|
|
1098
|
+
# np.mean(validated_y), 4
|
|
1099
|
+
np.mean(y_sorted), 4
|
|
1096
1100
|
)
|
|
1097
1101
|
if etalon_metric is not None:
|
|
1098
1102
|
train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
|
|
@@ -1153,13 +1157,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1153
1157
|
else:
|
|
1154
1158
|
eval_uplift = None
|
|
1155
1159
|
|
|
1156
|
-
# effective_eval_set = eval_set if eval_set is not None else self.eval_set
|
|
1157
1160
|
eval_metrics = {
|
|
1158
1161
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
|
1159
1162
|
"quality_metrics_eval_segment"
|
|
1160
1163
|
).format(idx + 1),
|
|
1161
1164
|
self.bundle.get("quality_metrics_rows_header"): _num_samples(
|
|
1162
|
-
effective_eval_set[idx][0]
|
|
1165
|
+
# effective_eval_set[idx][0]
|
|
1166
|
+
# Use actually used for metrics dataset
|
|
1167
|
+
eval_X_sorted
|
|
1163
1168
|
),
|
|
1164
1169
|
# self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
|
|
1165
1170
|
}
|
|
@@ -1167,7 +1172,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1167
1172
|
validated_eval_set[idx][1]
|
|
1168
1173
|
):
|
|
1169
1174
|
eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
|
1170
|
-
np.mean(validated_eval_set[idx][1]), 4
|
|
1175
|
+
# np.mean(validated_eval_set[idx][1]), 4
|
|
1176
|
+
# Use actually used for metrics dataset
|
|
1177
|
+
np.mean(eval_y_sorted), 4
|
|
1171
1178
|
)
|
|
1172
1179
|
if etalon_eval_metric is not None:
|
|
1173
1180
|
eval_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
|
|
@@ -2527,11 +2534,9 @@ if response.status_code == 200:
|
|
|
2527
2534
|
def __is_registered(self) -> bool:
|
|
2528
2535
|
return self.api_key is not None and self.api_key != ""
|
|
2529
2536
|
|
|
2530
|
-
def __log_warning(self, message: str, show_support_link: bool = False
|
|
2537
|
+
def __log_warning(self, message: str, show_support_link: bool = False):
|
|
2531
2538
|
warning_num = self.warning_counter.increment()
|
|
2532
2539
|
formatted_message = f"WARNING #{warning_num}: {message}\n"
|
|
2533
|
-
if is_red:
|
|
2534
|
-
formatted_message = Format.RED + formatted_message + Format.END
|
|
2535
2540
|
if show_support_link:
|
|
2536
2541
|
self.__display_support_link(formatted_message)
|
|
2537
2542
|
else:
|
|
@@ -2584,7 +2589,12 @@ if response.status_code == 200:
|
|
|
2584
2589
|
checked_generate_features = []
|
|
2585
2590
|
for gen_feature in self.generate_features:
|
|
2586
2591
|
if gen_feature not in x_columns:
|
|
2587
|
-
self.
|
|
2592
|
+
if gen_feature == self._get_phone_column(self.search_keys):
|
|
2593
|
+
raise ValidationError(
|
|
2594
|
+
self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
|
|
2595
|
+
)
|
|
2596
|
+
else:
|
|
2597
|
+
self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
|
|
2588
2598
|
else:
|
|
2589
2599
|
checked_generate_features.append(gen_feature)
|
|
2590
2600
|
self.generate_features = checked_generate_features
|
|
@@ -167,6 +167,12 @@ class DateTimeSearchKeyConverter:
|
|
|
167
167
|
# Drop intermediate columns if not needed
|
|
168
168
|
df.drop(columns=["second", "minute", "hour"], inplace=True)
|
|
169
169
|
|
|
170
|
+
for generated_feature in self.generated_features[:]:
|
|
171
|
+
if df[generated_feature].dropna().nunique() <= 1:
|
|
172
|
+
self.logger.warning(f"Generated constant feature {generated_feature} will be dropped")
|
|
173
|
+
df.drop(columns=generated_feature, inplace=True)
|
|
174
|
+
self.generated_features.remove(generated_feature)
|
|
175
|
+
|
|
170
176
|
df.drop(columns=seconds, inplace=True)
|
|
171
177
|
|
|
172
178
|
if keep_time:
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import threading
|
|
3
|
-
from typing import Callable, Optional
|
|
4
3
|
|
|
5
4
|
import requests
|
|
6
5
|
|
|
6
|
+
from upgini.utils.format import Format
|
|
7
|
+
|
|
7
8
|
try:
|
|
8
9
|
from packaging.version import parse
|
|
9
10
|
except ImportError:
|
|
@@ -31,18 +32,16 @@ def get_version(package, url_pattern=URL_PATTERN):
|
|
|
31
32
|
return version
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
def validate_version(logger: logging.Logger
|
|
35
|
+
def validate_version(logger: logging.Logger):
|
|
35
36
|
def task():
|
|
36
37
|
try:
|
|
37
38
|
current_version = parse(__version__)
|
|
38
39
|
latest_version = get_version("upgini")
|
|
39
40
|
if current_version < latest_version:
|
|
40
41
|
msg = bundle.get("version_warning").format(current_version, latest_version)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
logger.warning(msg)
|
|
45
|
-
print(msg)
|
|
42
|
+
formatted_message = Format.RED + msg + Format.END
|
|
43
|
+
logger.warning(msg)
|
|
44
|
+
print(formatted_message)
|
|
46
45
|
except Exception:
|
|
47
46
|
logger.warning("Failed to validate version", exc_info=True)
|
|
48
47
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.42"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|