upgini 1.2.81a3832.dev13__py3-none-any.whl → 1.2.81a3832.dev15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +29 -14
- upgini/metrics.py +1 -10
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/RECORD +7 -7
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/WHEEL +0 -0
- {upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.81a3832.
|
1
|
+
__version__ = "1.2.81a3832.dev15"
|
upgini/features_enricher.py
CHANGED
@@ -1053,8 +1053,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
1053
1053
|
|
1054
1054
|
# 1 If client features are presented - fit and predict with KFold estimator
|
1055
1055
|
# on etalon features and calculate baseline metric
|
1056
|
-
|
1056
|
+
baseline_metric = None
|
1057
1057
|
baseline_estimator = None
|
1058
|
+
updating_shaps = None
|
1058
1059
|
custom_loss_add_params = get_additional_params_custom_loss(
|
1059
1060
|
self.loss, model_task_type, logger=self.logger
|
1060
1061
|
)
|
@@ -1074,17 +1075,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
1074
1075
|
text_features=text_features,
|
1075
1076
|
has_date=has_date,
|
1076
1077
|
)
|
1077
|
-
|
1078
|
+
baseline_cv_result = baseline_estimator.cross_val_predict(
|
1078
1079
|
fitting_X, y_sorted, baseline_score_column
|
1079
1080
|
)
|
1080
|
-
|
1081
|
-
if
|
1081
|
+
baseline_metric = baseline_cv_result.get_display_metric()
|
1082
|
+
if baseline_metric is None:
|
1082
1083
|
self.logger.info(
|
1083
1084
|
f"Baseline {metric} on train client features is None (maybe all features was removed)"
|
1084
1085
|
)
|
1085
1086
|
baseline_estimator = None
|
1086
1087
|
else:
|
1087
|
-
self.logger.info(f"Baseline {metric} on train client features: {
|
1088
|
+
self.logger.info(f"Baseline {metric} on train client features: {baseline_metric}")
|
1089
|
+
updating_shaps = baseline_cv_result.shap_values
|
1088
1090
|
|
1089
1091
|
# 2 Fit and predict with KFold estimator on enriched tds
|
1090
1092
|
# and calculate final metric (and uplift)
|
@@ -1110,10 +1112,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1110
1112
|
)
|
1111
1113
|
enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
1112
1114
|
enriched_metric = enriched_cv_result.get_display_metric()
|
1113
|
-
|
1114
|
-
|
1115
|
-
if enriched_shaps is not None:
|
1116
|
-
self._update_shap_values(trace_id, fitting_X, enriched_shaps, silent=not internal_call)
|
1115
|
+
updating_shaps = enriched_cv_result.shap_values
|
1117
1116
|
|
1118
1117
|
if enriched_metric is None:
|
1119
1118
|
self.logger.warning(
|
@@ -1122,8 +1121,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
1122
1121
|
enriched_estimator = None
|
1123
1122
|
else:
|
1124
1123
|
self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
|
1125
|
-
if
|
1126
|
-
uplift = (enriched_cv_result.metric -
|
1124
|
+
if baseline_metric is not None and enriched_metric is not None:
|
1125
|
+
uplift = (enriched_cv_result.metric - baseline_cv_result.metric) * multiplier
|
1127
1126
|
|
1128
1127
|
train_metrics = {
|
1129
1128
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
@@ -1141,8 +1140,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
1141
1140
|
np.mean(y_sorted),
|
1142
1141
|
4,
|
1143
1142
|
)
|
1144
|
-
if
|
1145
|
-
train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] =
|
1143
|
+
if baseline_metric is not None:
|
1144
|
+
train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
|
1145
|
+
baseline_metric
|
1146
|
+
)
|
1146
1147
|
if enriched_metric is not None:
|
1147
1148
|
train_metrics[self.bundle.get("quality_metrics_enriched_header").format(metric)] = (
|
1148
1149
|
enriched_metric
|
@@ -1233,6 +1234,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
1233
1234
|
|
1234
1235
|
metrics.append(eval_metrics)
|
1235
1236
|
|
1237
|
+
if updating_shaps is not None:
|
1238
|
+
self._update_shap_values(trace_id, fitting_X, updating_shaps, silent=not internal_call)
|
1239
|
+
|
1236
1240
|
metrics_df = pd.DataFrame(metrics)
|
1237
1241
|
mean_target_hdr = self.bundle.get("quality_metrics_mean_target_header")
|
1238
1242
|
if mean_target_hdr in metrics_df.columns:
|
@@ -1283,6 +1287,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1283
1287
|
|
1284
1288
|
def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
|
1285
1289
|
renaming = self.fit_columns_renaming or {}
|
1290
|
+
self.logger.info(f"Updating SHAP values: {new_shaps}")
|
1286
1291
|
new_shaps = {
|
1287
1292
|
renaming.get(feature, feature): _round_shap_value(shap)
|
1288
1293
|
for feature, shap in new_shaps.items()
|
@@ -1909,6 +1914,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
1909
1914
|
enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
|
1910
1915
|
eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
|
1911
1916
|
|
1917
|
+
reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
|
1918
|
+
X_sampled.rename(columns=reversed_renaming, inplace=True)
|
1919
|
+
enriched_X.rename(columns=reversed_renaming, inplace=True)
|
1920
|
+
for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
|
1921
|
+
eval_X_sampled.rename(columns=reversed_renaming, inplace=True)
|
1922
|
+
enriched_eval_X.rename(columns=reversed_renaming, inplace=True)
|
1923
|
+
|
1912
1924
|
datasets_hash = hash_input(self.X, self.y, self.eval_set)
|
1913
1925
|
return self.__cache_and_return_results(
|
1914
1926
|
datasets_hash,
|
@@ -3925,7 +3937,10 @@ if response.status_code == 200:
|
|
3925
3937
|
if feature_meta.name in original_names_dict.keys():
|
3926
3938
|
feature_meta.name = original_names_dict[feature_meta.name]
|
3927
3939
|
|
3928
|
-
|
3940
|
+
is_client_feature = feature_meta.name in df.columns
|
3941
|
+
|
3942
|
+
# Show and update shap values for client features only if select_features is True
|
3943
|
+
if updated_shaps is not None and (not is_client_feature or self.fit_select_features):
|
3929
3944
|
updating_shap = updated_shaps.get(feature_meta.name)
|
3930
3945
|
if updating_shap is None:
|
3931
3946
|
if feature_meta.shap_value != 0.0:
|
upgini/metrics.py
CHANGED
@@ -91,17 +91,8 @@ CATBOOST_MULTICLASS_PARAMS = {
|
|
91
91
|
|
92
92
|
LIGHTGBM_REGRESSION_PARAMS = {
|
93
93
|
"random_state": DEFAULT_RANDOM_STATE,
|
94
|
-
"min_gain_to_split": 0.001,
|
95
94
|
"n_estimators": 275,
|
96
|
-
"max_depth": 5,
|
97
|
-
"max_cat_threshold": 80,
|
98
|
-
"min_data_per_group": 25,
|
99
|
-
"cat_l2": 10,
|
100
|
-
"cat_smooth": 12,
|
101
|
-
"learning_rate": 0.05,
|
102
95
|
"feature_fraction": 1.0,
|
103
|
-
"min_sum_hessian_in_leaf": 0.01,
|
104
|
-
"objective": "huber",
|
105
96
|
"deterministic": "true",
|
106
97
|
"verbosity": -1,
|
107
98
|
}
|
@@ -412,7 +403,7 @@ class EstimatorWrapper:
|
|
412
403
|
self.logger.info(f"Convert bool feature {c} to int64")
|
413
404
|
x[c] = x[c].astype(np.int64)
|
414
405
|
self.converted_to_int.append(c)
|
415
|
-
elif not is_valid_numeric_array_data(x[c]):
|
406
|
+
elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
|
416
407
|
try:
|
417
408
|
x[c] = pd.to_numeric(x[c], errors="raise")
|
418
409
|
self.converted_to_numeric.append(c)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.81a3832.
|
3
|
+
Version: 1.2.81a3832.dev15
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=5fFTJYQ7XW7NIap7AMUPvGMnpxRIIy0tf2GwHB7rqbo,34
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=AGF2u2mbFL4KIdqZECiSkGuMhfluamJOveqhYnkKfQM,212614
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=nVt4zJKt7y1xD1ga9698QKlJQfXv93lARjUMC1E1_U4,43163
|
10
10
|
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.81a3832.
|
74
|
-
upgini-1.2.81a3832.
|
75
|
-
upgini-1.2.81a3832.
|
76
|
-
upgini-1.2.81a3832.
|
73
|
+
upgini-1.2.81a3832.dev15.dist-info/METADATA,sha256=FJEb884wXgIIYNfyxAfG8D1R2w0W-a2u0g4Mimn-1IM,49173
|
74
|
+
upgini-1.2.81a3832.dev15.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81a3832.dev15.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3832.dev15.dist-info/RECORD,,
|
File without changes
|
File without changes
|