PyPI - upgini - Versions diffs - 1.1.267__py3-none-any.whl → 1.1.268__py3-none-any.whl - Mend

upgini 1.1.267py3-none-any.whl → 1.1.268py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

upgini/features_enricher.py CHANGED Viewed

@@ -3665,7 +3665,7 @@ class FeaturesEnricher(TransformerMixin):
                     if y is not None:
                         with open(f"{tmp_dir}/y.pickle", "wb") as y_file:
                             pickle.dump(sample(y, xy_sample_index), y_file)
-                        if eval_set is not None:
+                        if eval_set:
                             eval_xy_sample_index = rnd.randint(0, _num_samples(eval_set[0][0]), size=1000)
                             with open(f"{tmp_dir}/eval_x.pickle", "wb") as eval_x_file:
                                 pickle.dump(sample(eval_set[0][0], eval_xy_sample_index), eval_x_file)

upgini/metrics.py CHANGED Viewed

@@ -3,15 +3,16 @@ import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import catboost
 import numpy as np
 import pandas as pd
 from catboost import CatBoostClassifier, CatBoostRegressor
-import catboost
 from lightgbm import LGBMClassifier, LGBMRegressor
 from numpy import log1p
 from pandas.api.types import is_numeric_dtype
 from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
+from upgini.utils.features_validator import FeaturesValidator
 from upgini.utils.sklearn_ext import cross_validate
 try:
@@ -352,6 +353,7 @@ class EstimatorWrapper:
             "target_type": target_type,
             "groups": groups,
             "text_features": text_features,
+            "logger": logger,
         }
         if estimator is None:
             params = dict()
@@ -414,12 +416,22 @@ class CatBoostWrapper(EstimatorWrapper):
         target_type: ModelTaskType,
         groups: Optional[List[str]] = None,
         text_features: Optional[List[str]] = None,
+        logger: Optional[logging.Logger] = None,
     ):
         super(CatBoostWrapper, self).__init__(
-            estimator, scorer, metric_name, multiplier, cv, target_type, groups=groups, text_features=text_features
+            estimator,
+            scorer,
+            metric_name,
+            multiplier,
+            cv,
+            target_type,
+            groups=groups,
+            text_features=text_features,
+            logger=logger,
         )
         self.cat_features = None
         self.emb_features = None
+        self.exclude_features = []
     def _prepare_to_fit(self, X: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, dict]:
         X, y, groups, params = super()._prepare_to_fit(X, y)
@@ -437,9 +449,7 @@ class CatBoostWrapper(EstimatorWrapper):
                 X, embedding_features = self.group_embeddings(X)
                 params["embedding_features"] = embedding_features
             else:
-                self.logger.info(
-                    f"Embedding features count less than 3, so use them separately: {self.emb_features}"
-                )
+                self.logger.info(f"Embedding features count less than 3, so use them separately: {self.emb_features}")
                 self.emb_features = []
         else:
             self.logger.warning(f"Embedding features are not supported by Catboost version {catboost.__version__}")
@@ -498,6 +508,8 @@ class CatBoostWrapper(EstimatorWrapper):
         return df, [emb_name]
     def _prepare_to_calculate(self, X: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
+        if self.exclude_features:
+            X = X.drop(columns=self.exclude_features)
         X, y, params = super()._prepare_to_calculate(X, y)
         if self.text_features:
             params["text_features"] = self.text_features
@@ -510,6 +522,26 @@ class CatBoostWrapper(EstimatorWrapper):
         return X, y, params
+    def cross_val_predict(
+        self, X: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
+    ) -> Optional[float]:
+        try:
+            return super().cross_val_predict(X, y, baseline_score_column)
+        except Exception as e:
+            if "Dictionary size is 0" in e.args[0] and self.text_features:
+                high_cardinality_features = FeaturesValidator.find_high_cardinality(X[self.text_features])
+                self.logger.warning(
+                    "Failed to calculate metrics. Try to remove high cardinality"
+                    f" text features {high_cardinality_features} and retry"
+                )
+                for f in high_cardinality_features:
+                    self.text_features.remove(f)
+                    self.exclude_features.append(f)
+                    X = X.drop(columns=f)
+                return super().cross_val_predict(X, y, baseline_score_column)
+            else:
+                raise e
 class LightGBMWrapper(EstimatorWrapper):
     def __init__(
@@ -522,9 +554,18 @@ class LightGBMWrapper(EstimatorWrapper):
         target_type: ModelTaskType,
         groups: Optional[List[str]] = None,
         text_features: Optional[List[str]] = None,
+        logger: Optional[logging.Logger] = None,
     ):
         super(LightGBMWrapper, self).__init__(
-            estimator, scorer, metric_name, multiplier, cv, target_type, groups=groups, text_features=text_features
+            estimator,
+            scorer,
+            metric_name,
+            multiplier,
+            cv,
+            target_type,
+            groups=groups,
+            text_features=text_features,
+            logger=logger,
         )
         self.cat_features = None
@@ -561,9 +602,18 @@ class OtherEstimatorWrapper(EstimatorWrapper):
         target_type: ModelTaskType,
         groups: Optional[List[str]] = None,
         text_features: Optional[List[str]] = None,
+        logger: Optional[logging.Logger] = None,
     ):
         super(OtherEstimatorWrapper, self).__init__(
-            estimator, scorer, metric_name, multiplier, cv, target_type, groups=groups, text_features=text_features
+            estimator,
+            scorer,
+            metric_name,
+            multiplier,
+            cv,
+            target_type,
+            groups=groups,
+            text_features=text_features,
+            logger=logger,
         )
         self.cat_features = None

upgini/utils/sklearn_ext.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import functools
-import logging
 import numbers
 import time
 import warnings
@@ -313,7 +312,7 @@ def cross_validate(
         return ret
     except Exception:
-        logging.exception("Failed to execute overriden cross_validate. Fallback to original")
+        # logging.exception("Failed to execute overriden cross_validate. Fallback to original")
         raise
         # fit_params["use_best_model"] = False
         # return original_cross_validate(

{upgini-1.1.267.dist-info → upgini-1.1.268.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.267
+Version: 1.1.268
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers

{upgini-1.1.267.dist-info → upgini-1.1.268.dist-info}/RECORD RENAMED Viewed

@@ -2,11 +2,11 @@ upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
 upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
 upgini/dataset.py,sha256=xb4gIANyGbdcuM8Awyq2pJPiH_3k_LEbETApJgAoRBA,45529
 upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
-upgini/features_enricher.py,sha256=poGGf5MZgangMFmfTxRWtE6FDPDy5VUtXLmW2tGiorI,174170
+upgini/features_enricher.py,sha256=1vHhSQBnFsq6IoYaG_oJbgEWqRZMpkt1rkoLOD-6nl4,174158
 upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
 upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
 upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
-upgini/metrics.py,sha256=3VvSZW1cCOIPHImXuqcnWzD3fWcpPzVa9k8eulLbUmY,27426
+upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
 upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
 upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
 upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -52,12 +52,12 @@ upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
 upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
 upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
 upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
-upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
+upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
 upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
 upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
 upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
-upgini-1.1.267.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.1.267.dist-info/METADATA,sha256=TiFi7bLKF7TP0gGesfvnN_rs-2htvjOYQko0K4GKdDM,48156
-upgini-1.1.267.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-upgini-1.1.267.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
-upgini-1.1.267.dist-info/RECORD,,
+upgini-1.1.268.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.1.268.dist-info/METADATA,sha256=gTuBYet6-H97ppvX37qAJuC8tQCZ7bPetPl11O9XEFY,48156
+upgini-1.1.268.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+upgini-1.1.268.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
+upgini-1.1.268.dist-info/RECORD,,

{upgini-1.1.267.dist-info → upgini-1.1.268.dist-info}/LICENSE RENAMED Viewed

File without changes

{upgini-1.1.267.dist-info → upgini-1.1.268.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.1.267.dist-info → upgini-1.1.268.dist-info}/top_level.txt RENAMED Viewed

File without changes

upgini 1.1.267__py3-none-any.whl → 1.1.268__py3-none-any.whl

upgini 1.1.267py3-none-any.whl → 1.1.268py3-none-any.whl