PyPI - upgini - Versions diffs - 1.2.16a3654.dev3__py3-none-any.whl → 1.2.17__py3-none-any.whl - Mend

upgini 1.2.16a3654.dev3py3-none-any.whl → 1.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (9) hide show

upgini/__about__.py +1 -1
upgini/features_enricher.py +133 -39
upgini/metrics.py +68 -11
upgini/resource_bundle/strings.properties +1 -0
upgini/utils/display_utils.py +18 -5
{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/METADATA +2 -2
{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/RECORD +9 -9
{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/WHEEL +1 -1
{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~16a3654.dev3~~"
1	+ __version__ = "1.2.17"

upgini/features_enricher.py CHANGED Viewed

@@ -165,7 +165,6 @@ class FeaturesEnricher(TransformerMixin):
     RANDOM_STATE = 42
     CALCULATE_METRICS_THRESHOLD = 50_000_000
     CALCULATE_METRICS_MIN_THRESHOLD = 500
-    TEXT_FEATURES_THRESHOLD = 5_000
     GENERATE_FEATURES_LIMIT = 10
     EMPTY_FEATURES_INFO = pd.DataFrame(
         columns=[
@@ -337,6 +336,9 @@ class FeaturesEnricher(TransformerMixin):
         self.exclude_columns = exclude_columns
         self.baseline_score_column = baseline_score_column
         self.add_date_if_missing = add_date_if_missing
+        self.features_info_display_handle = None
+        self.data_sources_display_handle = None
+        self.report_button_handle = None
     def _get_api_key(self):
         return self._api_key
@@ -872,6 +874,13 @@ class FeaturesEnricher(TransformerMixin):
                 else None
             )
+            if self.X is None:
+                self.X = X
+            if self.y is None:
+                self.y = y
+            if self.eval_set is None:
+                self.eval_set = effective_eval_set
             try:
                 self.__log_debug_information(
                     validated_X,
@@ -939,16 +948,14 @@ class FeaturesEnricher(TransformerMixin):
                 gc.collect()
-                text_features = self.generate_features if fitting_X.shape[0] >= self.TEXT_FEATURES_THRESHOLD else []
+                if fitting_X.shape[1] == 0 and fitting_enriched_X.shape[1] == 0:
+                    print(self.bundle.get("metrics_no_important_free_features"))
+                    self.logger.warning("No client or free relevant ADS features found to calculate metrics")
+                    self.warning_counter.increment()
+                    return None
                 print(self.bundle.get("metrics_start"))
                 with Spinner():
-                    if fitting_X.shape[1] == 0 and fitting_enriched_X.shape[1] == 0:
-                        print(self.bundle.get("metrics_no_important_free_features"))
-                        self.logger.warning("No client or free relevant ADS features found to calculate metrics")
-                        self.warning_counter.increment()
-                        return None
                     self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
                     has_date = SearchKey.find_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME]) is not None
@@ -962,7 +969,7 @@ class FeaturesEnricher(TransformerMixin):
                         fitting_enriched_X,
                         scoring,
                         groups=groups,
-                        text_features=text_features,
+                        text_features=self.generate_features,
                         has_date=has_date,
                     )
                     metric = wrapper.metric_name
@@ -989,10 +996,10 @@ class FeaturesEnricher(TransformerMixin):
                             cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
-                            text_features=text_features,
+                            text_features=self.generate_features,
                             has_date=has_date,
                         )
-                        etalon_metric = baseline_estimator.cross_val_predict(
+                        etalon_metric, _ = baseline_estimator.cross_val_predict(
                             fitting_X, y_sorted, self.baseline_score_column
                         )
                         if etalon_metric is None:
@@ -1023,10 +1030,16 @@ class FeaturesEnricher(TransformerMixin):
                             cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
-                            text_features=text_features,
+                            text_features=self.generate_features,
                             has_date=has_date,
                         )
-                        enriched_metric = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
+                        enriched_metric, enriched_shaps = enriched_estimator.cross_val_predict(
+                            fitting_enriched_X, enriched_y_sorted
+                        )
+                        if enriched_shaps is not None:
+                            self._update_shap_values(enriched_shaps)
                         if enriched_metric is None:
                             self.logger.warning(
                                 f"Enriched {metric} on train combined features is None (maybe all features was removed)"
@@ -1159,13 +1172,6 @@ class FeaturesEnricher(TransformerMixin):
                     elif uplift_col in metrics_df.columns and (metrics_df[uplift_col] < 0).any():
                         self.logger.warning("Uplift is negative")
-                    if self.X is None:
-                        self.X = X
-                    if self.y is None:
-                        self.y = y
-                    if self.eval_set is None:
-                        self.eval_set = effective_eval_set
                     return metrics_df
             except Exception as e:
                 error_message = "Failed to calculate metrics" + (
@@ -1190,6 +1196,72 @@ class FeaturesEnricher(TransformerMixin):
             finally:
                 self.logger.info(f"Calculating metrics elapsed time: {time.time() - start_time}")
+    def _update_shap_values(self, new_shaps: Dict[str, float]):
+        new_shaps = {
+            feature: self._round_shap_value(shap)
+            for feature, shap in new_shaps.items()
+            if feature in self.feature_names_
+        }
+        features_importances = list(new_shaps.items())
+        features_importances.sort(key=lambda m: (-m[1], m[0]))
+        self.feature_names_, self.feature_importances_ = zip(*features_importances)
+        self.feature_names_ = list(self.feature_names_)
+        self.feature_importances_ = list(self.feature_importances_)
+        feature_name_header = self.bundle.get("features_info_name")
+        shap_value_header = self.bundle.get("features_info_shap")
+        def update_shap(row):
+            return new_shaps.get(row[feature_name_header], row[shap_value_header])
+        self.features_info[shap_value_header] = self.features_info.apply(update_shap, axis=1)
+        self._internal_features_info[shap_value_header] = self._internal_features_info.apply(update_shap, axis=1)
+        self._features_info_without_links[shap_value_header] = self._features_info_without_links.apply(
+            update_shap, axis=1
+        )
+        self.logger.info(f"Recalculated SHAP values:\n{self._features_info_without_links}")
+        self.features_info.sort_values(by=shap_value_header, ascending=False, inplace=True)
+        self._internal_features_info.sort_values(by=shap_value_header, ascending=False, inplace=True)
+        self._features_info_without_links.sort_values(by=shap_value_header, ascending=False, inplace=True)
+        self.relevant_data_sources = self._group_relevant_data_sources(self.features_info, self.bundle)
+        self._relevant_data_sources_wo_links = self._group_relevant_data_sources(
+            self._features_info_without_links, self.bundle
+        )
+        if self.features_info_display_handle is not None:
+            try:
+                _ = get_ipython()  # type: ignore
+                display_html_dataframe(
+                    self.features_info,
+                    self._features_info_without_links,
+                    self.bundle.get("relevant_features_header"),
+                    display_handle=self.features_info_display_handle,
+                )
+            except (ImportError, NameError):
+                print(self._internal_features_info)
+        if self.data_sources_display_handle is not None:
+            try:
+                _ = get_ipython()  # type: ignore
+                display_html_dataframe(
+                    self.relevant_data_sources,
+                    self._relevant_data_sources_wo_links,
+                    self.bundle.get("relevant_features_header"),
+                    display_handle=self.data_sources_display_handle,
+                )
+            except (ImportError, NameError):
+                print(self._relevant_data_sources_wo_links)
+        if self.report_button_handle is not None:
+            try:
+                _ = get_ipython()  # type: ignore
+                self.__show_report_button(display_handle=self.report_button_handle)
+            except (ImportError, NameError):
+                pass
     def _check_train_and_eval_target_distribution(self, y, eval_set_dict):
         uneven_distribution = False
         for eval_set in eval_set_dict.values():
@@ -1518,11 +1590,19 @@ class FeaturesEnricher(TransformerMixin):
             self.logger.info("No external features selected. So use only input datasets for metrics calculation")
             return self.__sample_only_input(validated_X, validated_y, eval_set, is_demo_dataset)
         # TODO save and check if dataset was deduplicated - use imbalance branch for such case
-        elif not self.imbalanced and not exclude_features_sources and is_input_same_as_fit:
+        elif (
+            not self.imbalanced
+            and not exclude_features_sources
+            and is_input_same_as_fit
+            and self.df_with_original_index is not None
+        ):
             self.logger.info("Dataset is not imbalanced, so use enriched_X from fit")
             return self.__sample_balanced(eval_set, trace_id, remove_outliers_calc_metrics)
         else:
-            self.logger.info("Dataset is imbalanced or exclude_features_sources or X was passed. Run transform")
+            self.logger.info(
+                "Dataset is imbalanced or exclude_features_sources or X was passed or this is saved search."
+                " Run transform"
+            )
             print(self.bundle.get("prepare_data_for_metrics"))
             return self.__sample_imbalanced(
                 validated_X,
@@ -2028,6 +2108,13 @@ class FeaturesEnricher(TransformerMixin):
             runtime_parameters = self._get_copy_of_runtime_parameters()
             features_for_transform = self._search_task.get_features_for_transform() or []
             if len(features_for_transform) > 0:
+                missing_features_for_transform = [
+                    columns_renaming.get(f) for f in features_for_transform if f not in df.columns
+                ]
+                if len(missing_features_for_transform) > 0:
+                    raise ValidationError(
+                        self.bundle.get("missing_features_for_transform").format(missing_features_for_transform)
+                    )
                 runtime_parameters.properties["features_for_embeddings"] = ",".join(features_for_transform)
             columns_for_system_record_id = sorted(list(search_keys.keys()) + features_for_transform)
@@ -2702,10 +2789,10 @@ class FeaturesEnricher(TransformerMixin):
                         progress_callback,
                     )
                 except Exception:
-                    self.__show_report_button()
+                    self.report_button_handle = self.__show_report_button(display_id="report_button")
                     raise
-        self.__show_report_button()
+        self.report_button_handle = self.__show_report_button(display_id="report_button")
         if not self.warning_counter.has_warnings():
             self.__display_support_link(self.bundle.get("all_ok_community_invite"))
@@ -3377,6 +3464,13 @@ class FeaturesEnricher(TransformerMixin):
         return result_train, result_eval_sets
+    @staticmethod
+    def _round_shap_value(shap: float) -> float:
+        if shap > 0.0 and shap < 0.0001:
+            return 0.0001
+        else:
+            return round(shap, 4)
     def __prepare_feature_importances(self, trace_id: str, x_columns: List[str], silent=False):
         llm_source = "LLM with external data augmentation"
         if self._search_task is None:
@@ -3394,12 +3488,6 @@ class FeaturesEnricher(TransformerMixin):
         features_info_without_links = []
         internal_features_info = []
-        def round_shap_value(shap: float) -> float:
-            if shap > 0.0 and shap < 0.0001:
-                return 0.0001
-            else:
-                return round(shap, 4)
         def list_or_single(lst: List[str], single: str):
             return lst or ([single] if single else [])
@@ -3432,7 +3520,7 @@ class FeaturesEnricher(TransformerMixin):
             feature_sample = []
             self.feature_names_.append(feature_meta.name)
-            self.feature_importances_.append(round_shap_value(feature_meta.shap_value))
+            self.feature_importances_.append(self._round_shap_value(feature_meta.shap_value))
             if feature_meta.name in features_df.columns:
                 feature_sample = np.random.choice(features_df[feature_meta.name].dropna().unique(), 3).tolist()
                 if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
@@ -3471,7 +3559,7 @@ class FeaturesEnricher(TransformerMixin):
             features_info.append(
                 {
                     self.bundle.get("features_info_name"): feature_name,
-                    self.bundle.get("features_info_shap"): round_shap_value(feature_meta.shap_value),
+                    self.bundle.get("features_info_shap"): self._round_shap_value(feature_meta.shap_value),
                     self.bundle.get("features_info_hitrate"): feature_meta.hit_rate,
                     self.bundle.get("features_info_value_preview"): feature_sample,
                     self.bundle.get("features_info_provider"): provider,
@@ -3482,7 +3570,7 @@ class FeaturesEnricher(TransformerMixin):
             features_info_without_links.append(
                 {
                     self.bundle.get("features_info_name"): internal_feature_name,
-                    self.bundle.get("features_info_shap"): round_shap_value(feature_meta.shap_value),
+                    self.bundle.get("features_info_shap"): self._round_shap_value(feature_meta.shap_value),
                     self.bundle.get("features_info_hitrate"): feature_meta.hit_rate,
                     self.bundle.get("features_info_value_preview"): feature_sample,
                     self.bundle.get("features_info_provider"): internal_provider,
@@ -3494,7 +3582,7 @@ class FeaturesEnricher(TransformerMixin):
                 {
                     self.bundle.get("features_info_name"): internal_feature_name,
                     "feature_link": feature_meta.doc_link,
-                    self.bundle.get("features_info_shap"): round_shap_value(feature_meta.shap_value),
+                    self.bundle.get("features_info_shap"): self._round_shap_value(feature_meta.shap_value),
                     self.bundle.get("features_info_hitrate"): feature_meta.hit_rate,
                     self.bundle.get("features_info_value_preview"): feature_sample,
                     self.bundle.get("features_info_provider"): internal_provider,
@@ -3774,14 +3862,18 @@ class FeaturesEnricher(TransformerMixin):
             print(Format.GREEN + Format.BOLD + msg + Format.END)
             self.logger.info(msg)
             if len(self.feature_names_) > 0:
-                display_html_dataframe(
-                    self.features_info, self._features_info_without_links, self.bundle.get("relevant_features_header")
+                self.features_info_display_handle = display_html_dataframe(
+                    self.features_info,
+                    self._features_info_without_links,
+                    self.bundle.get("relevant_features_header"),
+                    display_id="features_info",
                 )
-                display_html_dataframe(
+                self.data_sources_display_handle = display_html_dataframe(
                     self.relevant_data_sources,
                     self._relevant_data_sources_wo_links,
                     self.bundle.get("relevant_data_sources_header"),
+                    display_id="data_sources",
                 )
             else:
                 msg = self.bundle.get("features_info_zero_important_features")
@@ -3792,9 +3884,9 @@ class FeaturesEnricher(TransformerMixin):
             print(msg)
             print(self._internal_features_info)
-    def __show_report_button(self):
+    def __show_report_button(self, display_id: Optional[str] = None, display_handle=None):
         try:
-            prepare_and_show_report(
+            return prepare_and_show_report(
                 relevant_features_df=self._features_info_without_links,
                 relevant_datasources_df=self.relevant_data_sources,
                 metrics_df=self.metrics,
@@ -3802,6 +3894,8 @@ class FeaturesEnricher(TransformerMixin):
                 search_id=self._search_task.search_task_id,
                 email=self.rest_client.get_current_email(),
                 search_keys=[str(sk) for sk in self.search_keys.values()],
+                display_id=display_id,
+                display_handle=display_handle,
             )
         except Exception:
             pass

upgini/metrics.py CHANGED Viewed

@@ -3,13 +3,14 @@ from __future__ import annotations
 import inspect
 import logging
 import re
+from collections import defaultdict
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import catboost
 import numpy as np
 import pandas as pd
-from catboost import CatBoostClassifier, CatBoostRegressor
+from catboost import CatBoost, CatBoostClassifier, CatBoostRegressor, Pool
 from numpy import log1p
 from pandas.api.types import is_numeric_dtype
 from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
@@ -63,7 +64,7 @@ CATBOOST_BINARY_PARAMS = {
     "verbose": False,
     "random_state": DEFAULT_RANDOM_STATE,
     "allow_writing_files": False,
-    # "auto_class_weights": "SqrtBalanced",
+    "auto_class_weights": "Balanced",
 }
 CATBOOST_MULTICLASS_PARAMS = {
@@ -81,7 +82,7 @@ CATBOOST_MULTICLASS_PARAMS = {
     "verbose": False,
     "random_state": DEFAULT_RANDOM_STATE,
     "allow_writing_files": False,
-    "auto_class_weights": "SqrtBalanced",
+    "auto_class_weights": "Balanced",
 }
 LIGHTGBM_PARAMS = {
@@ -288,9 +289,12 @@ class EstimatorWrapper:
         x, y, _ = self._prepare_data(x, y)
         return x, y, {}
+    def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
+        return None
     def cross_val_predict(
         self, x: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
-    ) -> Optional[float]:
+    ) -> Tuple[Optional[float], Optional[Dict[str, float]]]:
         x, y, groups, fit_params = self._prepare_to_fit(x, y)
         if x.shape[1] == 0:
@@ -298,6 +302,7 @@ class EstimatorWrapper:
         scorer = check_scoring(self.estimator, scoring=self.scorer)
+        shap_values_all_folds = defaultdict(list)
         if baseline_score_column is not None and self.metric_name == "GINI":
             self.logger.info("Calculate baseline GINI on passed baseline_score_column and target")
             metric = roc_auc_score(y, x[baseline_score_column])
@@ -319,7 +324,29 @@ class EstimatorWrapper:
             self.check_fold_metrics(metrics_by_fold)
             metric = np.mean(metrics_by_fold) * self.multiplier
-        return self.post_process_metric(metric)
+            splits = self.cv.split(x, y, groups)
+            for estimator, split in zip(self.cv_estimators, splits):
+                _, validation_idx = split
+                cv_x = x.iloc[validation_idx]
+                cv_y = y[validation_idx]
+                shaps = self.calculate_shap(cv_x, cv_y, estimator)
+                if shaps is not None:
+                    for feature, shap_value in shaps.items():
+                        # shap_values_all_folds[feature] = shap_values_all_folds.get(feature, []) + shap_value.tolist()
+                        shap_values_all_folds[feature].extend(shap_value.tolist())
+        if shap_values_all_folds:
+            average_shap_values = {
+                feature: np.mean(np.array(shaps)) for feature, shaps in shap_values_all_folds.items() if len(shaps) > 0
+            }
+            if len(average_shap_values) == 0:
+                average_shap_values = None
+        else:
+            average_shap_values = None
+        return self.post_process_metric(metric), average_shap_values
     def check_fold_metrics(self, metrics_by_fold: List[float]):
         first_metric_sign = 1 if metrics_by_fold[0] >= 0 else -1
@@ -453,6 +480,7 @@ class CatBoostWrapper(EstimatorWrapper):
         )
         self.cat_features = None
         self.emb_features = None
+        self.grouped_embedding_features = None
         self.exclude_features = []
     def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, dict]:
@@ -462,17 +490,16 @@ class CatBoostWrapper(EstimatorWrapper):
         if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
             emb_pattern = r"(.+)_emb\d+"
             self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
-            embedding_features = []
             if len(self.emb_features) > 3:  # There is no reason to reduce embeddings dimension with less than 4
                 self.logger.info(
                     "Embedding features count more than 3, so group them into one vector for CatBoost: "
                     f"{self.emb_features}"
                 )
-                x, embedding_features = self.group_embeddings(x)
-                params["embedding_features"] = embedding_features
+                x, self.grouped_embedding_features = self.group_embeddings(x)
+                params["embedding_features"] = self.grouped_embedding_features
             else:
                 self.logger.info(f"Embedding features count less than 3, so use them separately: {self.emb_features}")
-                self.emb_features = []
+                self.grouped_embedding_features = None
         else:
             self.logger.warning(f"Embedding features are not supported by Catboost version {catboost.__version__}")
@@ -488,7 +515,7 @@ class CatBoostWrapper(EstimatorWrapper):
             self.logger.warning(f"Text features are not supported by this Catboost version {catboost.__version__}")
         # Find rest categorical features
-        self.cat_features = _get_cat_features(x, self.text_features, embedding_features)
+        self.cat_features = _get_cat_features(x, self.text_features, self.grouped_embedding_features)
         # x = fill_na_cat_features(x, self.cat_features)
         unique_cat_features = []
         for name in self.cat_features:
@@ -548,7 +575,7 @@ class CatBoostWrapper(EstimatorWrapper):
     def cross_val_predict(
         self, x: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
-    ) -> Optional[float]:
+    ) -> Tuple[Optional[float], Optional[Dict[str, float]]]:
         try:
             return super().cross_val_predict(x, y, baseline_score_column)
         except Exception as e:
@@ -573,6 +600,36 @@ class CatBoostWrapper(EstimatorWrapper):
             else:
                 raise e
+    def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator: CatBoost) -> Optional[Dict[str, float]]:
+        try:
+            # Create Pool for fold data, if need (for example, when categorical features are present)
+            fold_pool = Pool(
+                x,
+                y,
+                cat_features=self.cat_features,
+                text_features=self.text_features,
+                embedding_features=self.grouped_embedding_features,
+            )
+            # Get SHAP values of current estimator
+            shap_values_fold = estimator.get_feature_importance(data=fold_pool, type="ShapValues")
+            # Remove last columns (base value) and flatten
+            if self.target_type == ModelTaskType.MULTICLASS:
+                all_shaps = shap_values_fold[:, :, :-1]
+                all_shaps = [all_shaps[:, :, k].flatten() for k in range(all_shaps.shape[2])]
+            else:
+                all_shaps = shap_values_fold[:, :-1]
+                all_shaps = [all_shaps[:, k].flatten() for k in range(all_shaps.shape[1])]
+            all_shaps = np.abs(all_shaps)
+            return dict(zip(estimator.feature_names_, all_shaps))
+        except Exception:
+            self.logger.exception("Failed to recalculate new SHAP values")
+            return None
 class LightGBMWrapper(EstimatorWrapper):
     def __init__(

upgini/resource_bundle/strings.properties CHANGED Viewed

@@ -136,6 +136,7 @@ eval_y_is_empty=y in eval_set is empty.
 x_and_eval_x_diff_types=X and eval_set X has different types: {} and {}
 baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input dataframe
 baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
+missing_features_for_transform=Missing some features for transform that were presented on fit: {}
     # target validation
 empty_target=Target is empty in all rows
 # non_numeric_target=Binary target should be numerical type

upgini/utils/display_utils.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Callable, List, Optional
 import pandas as pd
 from xhtml2pdf import pisa
 from upgini.__about__ import __version__
@@ -72,7 +73,9 @@ def make_table(df: pd.DataFrame, wrap_long_string=None) -> str:
     )
-def display_html_dataframe(df: pd.DataFrame, internal_df: pd.DataFrame, header: str):
+def display_html_dataframe(
+    df: pd.DataFrame, internal_df: pd.DataFrame, header: str, display_id: Optional[str] = None, display_handle=None
+):
     if not ipython_available():
         print(header)
         print(internal_df)
@@ -133,7 +136,10 @@ def display_html_dataframe(df: pd.DataFrame, internal_df: pd.DataFrame, header:
             {table_html}
         </div>
         """
-    display(HTML(result_html))
+    if display_handle:
+        return display_handle.update(HTML(result_html))
+    else:
+        return display(HTML(result_html), display_id=display_id)
 def make_html_report(
@@ -279,6 +285,8 @@ def prepare_and_show_report(
     search_id: str,
     email: Optional[str],
     search_keys: Optional[List[str]] = None,
+    display_id: Optional[str] = None,
+    display_handle=None,
 ):
     if not ipython_available():
         return
@@ -288,10 +296,12 @@ def prepare_and_show_report(
     )
     if len(relevant_features_df) > 0:
-        show_button_download_pdf(report)
+        return show_button_download_pdf(report, display_id=display_id, display_handle=display_handle)
-def show_button_download_pdf(source: str, title="\U0001F4CA Download PDF report"):
+def show_button_download_pdf(
+    source: str, title="\U0001F4CA Download PDF report", display_id: Optional[str] = None, display_handle=None
+):
     from IPython.display import HTML, display
     file_name = f"upgini-report-{uuid.uuid4()}.pdf"
@@ -303,7 +313,10 @@ def show_button_download_pdf(source: str, title="\U0001F4CA Download PDF report"
         payload = b64.decode()
         html = f"""<a download="{file_name}" href="data:application/pdf;base64,{payload}" target="_blank">
         <button>{title}</button></a>"""
-        display(HTML(html))
+        if display_handle is not None:
+            display_handle.update(HTML(html))
+        else:
+            return display(HTML(html), display_id=display_id)
 def show_request_quote_button():

{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.16a3654.dev3
+Version: 1.2.17
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -145,7 +145,7 @@ Description-Content-Type: text/markdown
 ## 💼 Tutorials
-###  [Search of relevant external features & Automated feature generation for Salary predicton task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
+###  [Search of relevant external features & Automated feature generation for Salary prediction task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
 * The goal is to predict salary for data science job postning based on information about employer and job description.
 * Following this guide, you'll learn how to **search & auto generate new relevant features with Upgini library**

{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-upgini/__about__.py,sha256=_Bu0M4jZFah8h2SMG8XdLYshfHKLNdK08pZuRYLA3_4,33
+upgini/__about__.py,sha256=LyCJKEtzC7sS6MlxViknrdz9t79ni5iIOEGUNPPAnwU,23
 upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=4lfofrRPndG_CFMownDHZuXTnfMgDF1a8hW-ShdU8ns,188446
+upgini/features_enricher.py,sha256=3Jx6eoGULag64lN8pnwloI-RKwyLlVONrCADxpehwNo,192789
 upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
 upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
 upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
-upgini/metrics.py,sha256=CVE-hshFDrONIruPa-ZJV874ykpgs5oj8FtDduSWGm4,31264
+upgini/metrics.py,sha256=lhLqFv1tLWNzx3ULELo3MMSqI8eBoHL7P5jKpG8a6PE,33899
 upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1389
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 upgini/normalizer/normalize_utils.py,sha256=Lv75lq7M46z9cAIutwkdKZtPZkWblgoRzToAJ1BwY8A,7709
 upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
 upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
-upgini/resource_bundle/strings.properties,sha256=eqJP6bGu12zFuQJqMY03QbMhppcdwIfL2bsJWaqmuZ4,27221
+upgini/resource_bundle/strings.properties,sha256=bWWznzu43Lwfd-j4XDrpKJCpoxMMThd73awB7ge7wfo,27319
 upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
 upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -44,7 +44,7 @@ upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDc
 upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
 upgini/utils/datetime_utils.py,sha256=4tsGeehU0KS6wqNsc9gEEWZ9s6T9E0UReUIO3rSuXNU,12174
 upgini/utils/deduplicate_utils.py,sha256=NpaPtBYXwUtfKTRHWrtz2uUq6tZN6C_Nd719ydPRF2Q,8484
-upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
+upgini/utils/display_utils.py,sha256=NGhki1aGMsS8OeI69eLXEpmS_s41k8ojKHQxacJaXiU,11493
 upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
 upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
 upgini/utils/features_validator.py,sha256=yiOdzVtpArELMufzAa9mtWq32lETB6sIF-w3Yvl3vV8,3614
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
 upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
-upgini-1.2.16a3654.dev3.dist-info/METADATA,sha256=TwuZaQV-HlLl1aijL54CogvxQjndKzdkNBaXbc0jaZA,48587
-upgini-1.2.16a3654.dev3.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
-upgini-1.2.16a3654.dev3.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.16a3654.dev3.dist-info/RECORD,,
+upgini-1.2.17.dist-info/METADATA,sha256=g8R9yIZmDZNOFNFMVW-65PTooKnQx6tWMX4Z1Pky-yI,48578
+upgini-1.2.17.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.17.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.17.dist-info/RECORD,,

{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.24.2
+Generator: hatchling 1.25.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{upgini-1.2.16a3654.dev3.dist-info → upgini-1.2.17.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.16a3654.dev3__py3-none-any.whl → 1.2.17__py3-none-any.whl

Potentially problematic release.

upgini 1.2.16a3654.dev3py3-none-any.whl → 1.2.17py3-none-any.whl