upgini 1.2.81a3832.dev13__py3-none-any.whl → 1.2.81a3832.dev14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +25 -13
- upgini/metrics.py +1 -1
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev14.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev14.dist-info}/RECORD +7 -7
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev14.dist-info}/WHEEL +0 -0
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev14.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.81a3832.
|
1
|
+
__version__ = "1.2.81a3832.dev14"
|
upgini/features_enricher.py
CHANGED
@@ -1053,8 +1053,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
1053
1053
|
|
1054
1054
|
# 1 If client features are presented - fit and predict with KFold estimator
|
1055
1055
|
# on etalon features and calculate baseline metric
|
1056
|
-
|
1056
|
+
baseline_metric = None
|
1057
1057
|
baseline_estimator = None
|
1058
|
+
updating_shaps = None
|
1058
1059
|
custom_loss_add_params = get_additional_params_custom_loss(
|
1059
1060
|
self.loss, model_task_type, logger=self.logger
|
1060
1061
|
)
|
@@ -1074,17 +1075,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
1074
1075
|
text_features=text_features,
|
1075
1076
|
has_date=has_date,
|
1076
1077
|
)
|
1077
|
-
|
1078
|
+
baseline_cv_result = baseline_estimator.cross_val_predict(
|
1078
1079
|
fitting_X, y_sorted, baseline_score_column
|
1079
1080
|
)
|
1080
|
-
|
1081
|
-
if
|
1081
|
+
baseline_metric = baseline_cv_result.get_display_metric()
|
1082
|
+
if baseline_metric is None:
|
1082
1083
|
self.logger.info(
|
1083
1084
|
f"Baseline {metric} on train client features is None (maybe all features was removed)"
|
1084
1085
|
)
|
1085
1086
|
baseline_estimator = None
|
1086
1087
|
else:
|
1087
|
-
self.logger.info(f"Baseline {metric} on train client features: {
|
1088
|
+
self.logger.info(f"Baseline {metric} on train client features: {baseline_metric}")
|
1089
|
+
updating_shaps = baseline_cv_result.shap_values
|
1088
1090
|
|
1089
1091
|
# 2 Fit and predict with KFold estimator on enriched tds
|
1090
1092
|
# and calculate final metric (and uplift)
|
@@ -1110,10 +1112,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1110
1112
|
)
|
1111
1113
|
enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
1112
1114
|
enriched_metric = enriched_cv_result.get_display_metric()
|
1113
|
-
|
1114
|
-
|
1115
|
-
if enriched_shaps is not None:
|
1116
|
-
self._update_shap_values(trace_id, fitting_X, enriched_shaps, silent=not internal_call)
|
1115
|
+
updating_shaps = enriched_cv_result.shap_values
|
1117
1116
|
|
1118
1117
|
if enriched_metric is None:
|
1119
1118
|
self.logger.warning(
|
@@ -1122,8 +1121,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
1122
1121
|
enriched_estimator = None
|
1123
1122
|
else:
|
1124
1123
|
self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
|
1125
|
-
if
|
1126
|
-
uplift = (enriched_cv_result.metric -
|
1124
|
+
if baseline_metric is not None and enriched_metric is not None:
|
1125
|
+
uplift = (enriched_cv_result.metric - baseline_cv_result.metric) * multiplier
|
1127
1126
|
|
1128
1127
|
train_metrics = {
|
1129
1128
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
@@ -1141,8 +1140,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
1141
1140
|
np.mean(y_sorted),
|
1142
1141
|
4,
|
1143
1142
|
)
|
1144
|
-
if
|
1145
|
-
train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] =
|
1143
|
+
if baseline_metric is not None:
|
1144
|
+
train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
|
1145
|
+
baseline_metric
|
1146
|
+
)
|
1146
1147
|
if enriched_metric is not None:
|
1147
1148
|
train_metrics[self.bundle.get("quality_metrics_enriched_header").format(metric)] = (
|
1148
1149
|
enriched_metric
|
@@ -1233,6 +1234,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
1233
1234
|
|
1234
1235
|
metrics.append(eval_metrics)
|
1235
1236
|
|
1237
|
+
if updating_shaps is not None:
|
1238
|
+
self._update_shap_values(trace_id, fitting_X, updating_shaps, silent=not internal_call)
|
1239
|
+
|
1236
1240
|
metrics_df = pd.DataFrame(metrics)
|
1237
1241
|
mean_target_hdr = self.bundle.get("quality_metrics_mean_target_header")
|
1238
1242
|
if mean_target_hdr in metrics_df.columns:
|
@@ -1283,6 +1287,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1283
1287
|
|
1284
1288
|
def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
|
1285
1289
|
renaming = self.fit_columns_renaming or {}
|
1290
|
+
self.logger.info(f"Updating SHAP values: {new_shaps}")
|
1286
1291
|
new_shaps = {
|
1287
1292
|
renaming.get(feature, feature): _round_shap_value(shap)
|
1288
1293
|
for feature, shap in new_shaps.items()
|
@@ -1909,6 +1914,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
1909
1914
|
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
1910
1915
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
1911
1916
|
|
1917
|
+
reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
1918
|
+
X_sampled.rename(columns=reversed_renaming, inplace=True)
|
1919
|
+
enriched_X.rename(columns=reversed_renaming, inplace=True)
|
1920
|
+
for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
|
1921
|
+
eval_X_sampled.rename(columns=reversed_renaming, inplace=True)
|
1922
|
+
enriched_eval_X.rename(columns=reversed_renaming, inplace=True)
|
1923
|
+
|
1912
1924
|
datasets_hash = hash_input(self.X, self.y, self.eval_set)
|
1913
1925
|
return self.__cache_and_return_results(
|
1914
1926
|
datasets_hash,
|
upgini/metrics.py
CHANGED
@@ -412,7 +412,7 @@ class EstimatorWrapper:
|
|
412
412
|
self.logger.info(f"Convert bool feature {c} to int64")
|
413
413
|
x[c] = x[c].astype(np.int64)
|
414
414
|
self.converted_to_int.append(c)
|
415
|
-
elif not is_valid_numeric_array_data(x[c]):
|
415
|
+
elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
|
416
416
|
try:
|
417
417
|
x[c] = pd.to_numeric(x[c], errors="raise")
|
418
418
|
self.converted_to_numeric.append(c)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.81a3832.
|
3
|
+
Version: 1.2.81a3832.dev14
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=CxmaKb_KuIdKjpk8WSH1xoBLAKkBn_YYUK1Oy4B1YPo,34
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=MvkF7QFEFJnFSgCmc9R9OAw0sEzNXJOP45KfCvT7LBg,212399
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=rmC3xCLOr1TF-nUiL7pHnECp137GZs1Qa7bNDsOJoLM,43404
|
10
10
|
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.81a3832.
|
74
|
-
upgini-1.2.81a3832.
|
75
|
-
upgini-1.2.81a3832.
|
76
|
-
upgini-1.2.81a3832.
|
73
|
+
upgini-1.2.81a3832.dev14.dist-info/METADATA,sha256=Oo42vaGU2xYm37PAI1EB1n10GT_w7o8l4O5p0dYWmQE,49173
|
74
|
+
upgini-1.2.81a3832.dev14.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81a3832.dev14.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3832.dev14.dist-info/RECORD,,
|
File without changes
|
File without changes
|