upgini 1.2.81a3832.dev12__py3-none-any.whl → 1.2.81a3832.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.81a3832.dev12"
1
+ __version__ = "1.2.81a3832.dev14"
@@ -1053,8 +1053,9 @@ class FeaturesEnricher(TransformerMixin):
1053
1053
 
1054
1054
  # 1 If client features are presented - fit and predict with KFold estimator
1055
1055
  # on etalon features and calculate baseline metric
1056
- etalon_metric = None
1056
+ baseline_metric = None
1057
1057
  baseline_estimator = None
1058
+ updating_shaps = None
1058
1059
  custom_loss_add_params = get_additional_params_custom_loss(
1059
1060
  self.loss, model_task_type, logger=self.logger
1060
1061
  )
@@ -1074,17 +1075,18 @@ class FeaturesEnricher(TransformerMixin):
1074
1075
  text_features=text_features,
1075
1076
  has_date=has_date,
1076
1077
  )
1077
- etalon_cv_result = baseline_estimator.cross_val_predict(
1078
+ baseline_cv_result = baseline_estimator.cross_val_predict(
1078
1079
  fitting_X, y_sorted, baseline_score_column
1079
1080
  )
1080
- etalon_metric = etalon_cv_result.get_display_metric()
1081
- if etalon_metric is None:
1081
+ baseline_metric = baseline_cv_result.get_display_metric()
1082
+ if baseline_metric is None:
1082
1083
  self.logger.info(
1083
1084
  f"Baseline {metric} on train client features is None (maybe all features was removed)"
1084
1085
  )
1085
1086
  baseline_estimator = None
1086
1087
  else:
1087
- self.logger.info(f"Baseline {metric} on train client features: {etalon_metric}")
1088
+ self.logger.info(f"Baseline {metric} on train client features: {baseline_metric}")
1089
+ updating_shaps = baseline_cv_result.shap_values
1088
1090
 
1089
1091
  # 2 Fit and predict with KFold estimator on enriched tds
1090
1092
  # and calculate final metric (and uplift)
@@ -1110,10 +1112,7 @@ class FeaturesEnricher(TransformerMixin):
1110
1112
  )
1111
1113
  enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
1112
1114
  enriched_metric = enriched_cv_result.get_display_metric()
1113
- enriched_shaps = enriched_cv_result.shap_values
1114
-
1115
- if enriched_shaps is not None:
1116
- self._update_shap_values(trace_id, fitting_X, enriched_shaps, silent=not internal_call)
1115
+ updating_shaps = enriched_cv_result.shap_values
1117
1116
 
1118
1117
  if enriched_metric is None:
1119
1118
  self.logger.warning(
@@ -1122,8 +1121,8 @@ class FeaturesEnricher(TransformerMixin):
1122
1121
  enriched_estimator = None
1123
1122
  else:
1124
1123
  self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
1125
- if etalon_metric is not None and enriched_metric is not None:
1126
- uplift = (enriched_cv_result.metric - etalon_cv_result.metric) * multiplier
1124
+ if baseline_metric is not None and enriched_metric is not None:
1125
+ uplift = (enriched_cv_result.metric - baseline_cv_result.metric) * multiplier
1127
1126
 
1128
1127
  train_metrics = {
1129
1128
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
@@ -1141,8 +1140,10 @@ class FeaturesEnricher(TransformerMixin):
1141
1140
  np.mean(y_sorted),
1142
1141
  4,
1143
1142
  )
1144
- if etalon_metric is not None:
1145
- train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
1143
+ if baseline_metric is not None:
1144
+ train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
1145
+ baseline_metric
1146
+ )
1146
1147
  if enriched_metric is not None:
1147
1148
  train_metrics[self.bundle.get("quality_metrics_enriched_header").format(metric)] = (
1148
1149
  enriched_metric
@@ -1233,6 +1234,9 @@ class FeaturesEnricher(TransformerMixin):
1233
1234
 
1234
1235
  metrics.append(eval_metrics)
1235
1236
 
1237
+ if updating_shaps is not None:
1238
+ self._update_shap_values(trace_id, fitting_X, updating_shaps, silent=not internal_call)
1239
+
1236
1240
  metrics_df = pd.DataFrame(metrics)
1237
1241
  mean_target_hdr = self.bundle.get("quality_metrics_mean_target_header")
1238
1242
  if mean_target_hdr in metrics_df.columns:
@@ -1283,6 +1287,7 @@ class FeaturesEnricher(TransformerMixin):
1283
1287
 
1284
1288
  def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
1285
1289
  renaming = self.fit_columns_renaming or {}
1290
+ self.logger.info(f"Updating SHAP values: {new_shaps}")
1286
1291
  new_shaps = {
1287
1292
  renaming.get(feature, feature): _round_shap_value(shap)
1288
1293
  for feature, shap in new_shaps.items()
@@ -1909,6 +1914,13 @@ class FeaturesEnricher(TransformerMixin):
1909
1914
  enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
1910
1915
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
1911
1916
 
1917
+ reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
1918
+ X_sampled.rename(columns=reversed_renaming, inplace=True)
1919
+ enriched_X.rename(columns=reversed_renaming, inplace=True)
1920
+ for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
1921
+ eval_X_sampled.rename(columns=reversed_renaming, inplace=True)
1922
+ enriched_eval_X.rename(columns=reversed_renaming, inplace=True)
1923
+
1912
1924
  datasets_hash = hash_input(self.X, self.y, self.eval_set)
1913
1925
  return self.__cache_and_return_results(
1914
1926
  datasets_hash,
upgini/metrics.py CHANGED
@@ -412,7 +412,7 @@ class EstimatorWrapper:
412
412
  self.logger.info(f"Convert bool feature {c} to int64")
413
413
  x[c] = x[c].astype(np.int64)
414
414
  self.converted_to_int.append(c)
415
- elif not is_valid_numeric_array_data(x[c]):
415
+ elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
416
416
  try:
417
417
  x[c] = pd.to_numeric(x[c], errors="raise")
418
418
  self.converted_to_numeric.append(c)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.81a3832.dev12
3
+ Version: 1.2.81a3832.dev14
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=QoAMu0gkmwzsYvsLvBmcg4CfaE-sL6T-rz9s8HCGZY4,34
1
+ upgini/__about__.py,sha256=CxmaKb_KuIdKjpk8WSH1xoBLAKkBn_YYUK1Oy4B1YPo,34
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=cbQydnSOr7-ioQuEs-X3KYd0ays1BPuwFE_sKmOQc5E,211702
6
+ upgini/features_enricher.py,sha256=MvkF7QFEFJnFSgCmc9R9OAw0sEzNXJOP45KfCvT7LBg,212399
7
7
  upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=sbxnFyMWCUsVSAy-OwNmDYJxVlGEnTArVUnTOID7miU,43373
9
+ upgini/metrics.py,sha256=rmC3xCLOr1TF-nUiL7pHnECp137GZs1Qa7bNDsOJoLM,43404
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.81a3832.dev12.dist-info/METADATA,sha256=2cf3_AwHclmjPzAluKb_Y2I_4OecghsB-DqKoJVODls,49173
74
- upgini-1.2.81a3832.dev12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.81a3832.dev12.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.81a3832.dev12.dist-info/RECORD,,
73
+ upgini-1.2.81a3832.dev14.dist-info/METADATA,sha256=Oo42vaGU2xYm37PAI1EB1n10GT_w7o8l4O5p0dYWmQE,49173
74
+ upgini-1.2.81a3832.dev14.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.81a3832.dev14.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.81a3832.dev14.dist-info/RECORD,,