PyPI - upgini - Versions diffs - 1.2.81a3832.dev13__py3-none-any.whl → 1.2.81a3832.dev15__py3-none-any.whl - Mend

upgini 1.2.81a3832.dev13py3-none-any.whl → 1.2.81a3832.dev15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (7) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.81a3832.~~dev13~~"
1	+ __version__ = "1.2.81a3832.dev15"

upgini/features_enricher.py CHANGED Viewed

@@ -1053,8 +1053,9 @@ class FeaturesEnricher(TransformerMixin):
                     # 1 If client features are presented - fit and predict with KFold estimator
                     # on etalon features and calculate baseline metric
-                    etalon_metric = None
+                    baseline_metric = None
                     baseline_estimator = None
+                    updating_shaps = None
                     custom_loss_add_params = get_additional_params_custom_loss(
                         self.loss, model_task_type, logger=self.logger
                     )
@@ -1074,17 +1075,18 @@ class FeaturesEnricher(TransformerMixin):
                             text_features=text_features,
                             has_date=has_date,
                         )
-                        etalon_cv_result = baseline_estimator.cross_val_predict(
+                        baseline_cv_result = baseline_estimator.cross_val_predict(
                             fitting_X, y_sorted, baseline_score_column
                         )
-                        etalon_metric = etalon_cv_result.get_display_metric()
-                        if etalon_metric is None:
+                        baseline_metric = baseline_cv_result.get_display_metric()
+                        if baseline_metric is None:
                             self.logger.info(
                                 f"Baseline {metric} on train client features is None (maybe all features was removed)"
                             )
                             baseline_estimator = None
                         else:
-                            self.logger.info(f"Baseline {metric} on train client features: {etalon_metric}")
+                            self.logger.info(f"Baseline {metric} on train client features: {baseline_metric}")
+                        updating_shaps = baseline_cv_result.shap_values
                     # 2 Fit and predict with KFold estimator on enriched tds
                     # and calculate final metric (and uplift)
@@ -1110,10 +1112,7 @@ class FeaturesEnricher(TransformerMixin):
                         )
                         enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
                         enriched_metric = enriched_cv_result.get_display_metric()
-                        enriched_shaps = enriched_cv_result.shap_values
-                        if enriched_shaps is not None:
-                            self._update_shap_values(trace_id, fitting_X, enriched_shaps, silent=not internal_call)
+                        updating_shaps = enriched_cv_result.shap_values
                         if enriched_metric is None:
                             self.logger.warning(
@@ -1122,8 +1121,8 @@ class FeaturesEnricher(TransformerMixin):
                             enriched_estimator = None
                         else:
                             self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
-                        if etalon_metric is not None and enriched_metric is not None:
-                            uplift = (enriched_cv_result.metric - etalon_cv_result.metric) * multiplier
+                        if baseline_metric is not None and enriched_metric is not None:
+                            uplift = (enriched_cv_result.metric - baseline_cv_result.metric) * multiplier
                     train_metrics = {
                         self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
@@ -1141,8 +1140,10 @@ class FeaturesEnricher(TransformerMixin):
                             np.mean(y_sorted),
                             4,
                         )
-                    if etalon_metric is not None:
-                        train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
+                    if baseline_metric is not None:
+                        train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
+                            baseline_metric
+                        )
                     if enriched_metric is not None:
                         train_metrics[self.bundle.get("quality_metrics_enriched_header").format(metric)] = (
                             enriched_metric
@@ -1233,6 +1234,9 @@ class FeaturesEnricher(TransformerMixin):
                             metrics.append(eval_metrics)
+                    if updating_shaps is not None:
+                        self._update_shap_values(trace_id, fitting_X, updating_shaps, silent=not internal_call)
                     metrics_df = pd.DataFrame(metrics)
                     mean_target_hdr = self.bundle.get("quality_metrics_mean_target_header")
                     if mean_target_hdr in metrics_df.columns:
@@ -1283,6 +1287,7 @@ class FeaturesEnricher(TransformerMixin):
     def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
         renaming = self.fit_columns_renaming or {}
+        self.logger.info(f"Updating SHAP values: {new_shaps}")
         new_shaps = {
             renaming.get(feature, feature): _round_shap_value(shap)
             for feature, shap in new_shaps.items()
@@ -1909,6 +1914,13 @@ class FeaturesEnricher(TransformerMixin):
                 enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
                 eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
+        reversed_renaming = {v: k for k, v in self.fit_columns_renaming.items()}
+        X_sampled.rename(columns=reversed_renaming, inplace=True)
+        enriched_X.rename(columns=reversed_renaming, inplace=True)
+        for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
+            eval_X_sampled.rename(columns=reversed_renaming, inplace=True)
+            enriched_eval_X.rename(columns=reversed_renaming, inplace=True)
         datasets_hash = hash_input(self.X, self.y, self.eval_set)
         return self.__cache_and_return_results(
             datasets_hash,
@@ -3925,7 +3937,10 @@ if response.status_code == 200:
             if feature_meta.name in original_names_dict.keys():
                 feature_meta.name = original_names_dict[feature_meta.name]
-            if updated_shaps is not None:
+            is_client_feature = feature_meta.name in df.columns
+            # Show and update shap values for client features only if select_features is True
+            if updated_shaps is not None and (not is_client_feature or self.fit_select_features):
                 updating_shap = updated_shaps.get(feature_meta.name)
                 if updating_shap is None:
                     if feature_meta.shap_value != 0.0:

upgini/metrics.py CHANGED Viewed

@@ -91,17 +91,8 @@ CATBOOST_MULTICLASS_PARAMS = {
 LIGHTGBM_REGRESSION_PARAMS = {
     "random_state": DEFAULT_RANDOM_STATE,
-    "min_gain_to_split": 0.001,
     "n_estimators": 275,
-    "max_depth": 5,
-    "max_cat_threshold": 80,
-    "min_data_per_group": 25,
-    "cat_l2": 10,
-    "cat_smooth": 12,
-    "learning_rate": 0.05,
     "feature_fraction": 1.0,
-    "min_sum_hessian_in_leaf": 0.01,
-    "objective": "huber",
     "deterministic": "true",
     "verbosity": -1,
 }
@@ -412,7 +403,7 @@ class EstimatorWrapper:
                     self.logger.info(f"Convert bool feature {c} to int64")
                     x[c] = x[c].astype(np.int64)
                     self.converted_to_int.append(c)
-                elif not is_valid_numeric_array_data(x[c]):
+                elif not is_valid_numeric_array_data(x[c]) and not is_numeric_dtype(x[c]):
                     try:
                         x[c] = pd.to_numeric(x[c], errors="raise")
                         self.converted_to_numeric.append(c)

{upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.81a3832.dev13
+Version: 1.2.81a3832.dev15
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=CCFxP1VgWDg8FlCi3yTTGZEXFEnTDqGy6nl6_gT9xP0,34
+upgini/__about__.py,sha256=5fFTJYQ7XW7NIap7AMUPvGMnpxRIIy0tf2GwHB7rqbo,34
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=cbQydnSOr7-ioQuEs-X3KYd0ays1BPuwFE_sKmOQc5E,211702
+upgini/features_enricher.py,sha256=AGF2u2mbFL4KIdqZECiSkGuMhfluamJOveqhYnkKfQM,212614
 upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
 upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
-upgini/metrics.py,sha256=sbxnFyMWCUsVSAy-OwNmDYJxVlGEnTArVUnTOID7miU,43373
+upgini/metrics.py,sha256=nVt4zJKt7y1xD1ga9698QKlJQfXv93lARjUMC1E1_U4,43163
 upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.81a3832.dev13.dist-info/METADATA,sha256=OxsXwwF5632MzclasOWBdCpOTrJYw_V2-XREksLWkDY,49173
-upgini-1.2.81a3832.dev13.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.81a3832.dev13.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.81a3832.dev13.dist-info/RECORD,,
+upgini-1.2.81a3832.dev15.dist-info/METADATA,sha256=FJEb884wXgIIYNfyxAfG8D1R2w0W-a2u0g4Mimn-1IM,49173
+upgini-1.2.81a3832.dev15.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.81a3832.dev15.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.81a3832.dev15.dist-info/RECORD,,

{upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.81a3832.dev13.dist-info → upgini-1.2.81a3832.dev15.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.81a3832.dev13__py3-none-any.whl → 1.2.81a3832.dev15__py3-none-any.whl

Potentially problematic release.

upgini 1.2.81a3832.dev13py3-none-any.whl → 1.2.81a3832.dev15py3-none-any.whl