PyPI - upgini - Versions diffs - 1.2.81a3832.dev6__py3-none-any.whl → 1.2.81a3832.dev8__py3-none-any.whl - Mend

upgini 1.2.81a3832.dev6py3-none-any.whl → 1.2.81a3832.dev8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (7) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.81a3832.~~dev6~~"
1	+ __version__ = "1.2.81a3832.dev8"

upgini/features_enricher.py CHANGED Viewed

@@ -3934,6 +3934,7 @@ if response.status_code == 200:
                 continue
             # Use only important features
+            # If select_features is False, we don't show etalon features in the report
             if (
                 # feature_meta.name in self.fit_generated_features or
                 feature_meta.name == COUNTRY  # constant synthetic column

upgini/metrics.py CHANGED Viewed

@@ -18,6 +18,7 @@ from numpy import log1p
 from pandas.api.types import is_numeric_dtype
 from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
+from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
 from upgini.utils.features_validator import FeaturesValidator
 from upgini.utils.sklearn_ext import cross_validate
@@ -31,7 +32,7 @@ except ImportError:
     available_scorers = SCORERS
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
-from sklearn.model_selection import BaseCrossValidator  # , TimeSeriesSplit
+from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit  # , TimeSeriesSplit
 from upgini.errors import ValidationError
 from upgini.metadata import ModelTaskType
@@ -250,6 +251,8 @@ class _CrossValResults:
 class EstimatorWrapper:
+    default_estimator = "catboost"
     def __init__(
         self,
         estimator,
@@ -352,6 +355,7 @@ class EstimatorWrapper:
             self.logger.info("Calculate baseline GINI on passed baseline_score_column and target")
             metric = roc_auc_score(y, x[baseline_score_column])
         else:
+            self.logger.info(f"Cross validate with estimeator: {self.estimator}")
             cv_results = cross_validate(
                 estimator=self.estimator,
                 x=x,
@@ -458,31 +462,43 @@ class EstimatorWrapper:
             "logger": logger,
         }
         if estimator is None:
-            params = {"has_time": has_date}
-            if target_type == ModelTaskType.MULTICLASS:
-                params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
-                params = _get_add_params(params, add_params)
-                estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
-                # params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
-                # params = _get_add_params(params, add_params)
-                # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
-            elif target_type == ModelTaskType.BINARY:
-                params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
-                params = _get_add_params(params, add_params)
-                estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
-                # params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
-                # params = _get_add_params(params, add_params)
-                # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
-            elif target_type == ModelTaskType.REGRESSION:
-                params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
-                params = _get_add_params(params, add_params)
-                estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
-                # if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
-                #     params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
-                # params = _get_add_params(params, add_params)
-                # estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
+            if EstimatorWrapper.default_estimator == "catboost":
+                logger.info("Using CatBoost as default estimator")
+                params = {"has_time": has_date}
+                if target_type == ModelTaskType.MULTICLASS:
+                    params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
+                elif target_type == ModelTaskType.BINARY:
+                    params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
+                elif target_type == ModelTaskType.REGRESSION:
+                    params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
+                else:
+                    raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
+            elif EstimatorWrapper.default_estimator == "lightgbm":
+                logger.info("Using LightGBM as default estimator")
+                params = {"random_state": DEFAULT_RANDOM_STATE, "verbose": -1}
+                if target_type == ModelTaskType.MULTICLASS:
+                    params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
+                elif target_type == ModelTaskType.BINARY:
+                    params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
+                elif target_type == ModelTaskType.REGRESSION:
+                    if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
+                        params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
+                    params = _get_add_params(params, add_params)
+                    estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
+                else:
+                    raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
             else:
-                raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
+                raise Exception("Unsupported default_estimator. Available: catboost, lightgbm")
         else:
             if hasattr(estimator, "copy"):
                 estimator_copy = estimator.copy()
@@ -490,8 +506,8 @@ class EstimatorWrapper:
                 estimator_copy = deepcopy(estimator)
             kwargs["estimator"] = estimator_copy
             if is_catboost_estimator(estimator):
-                if cat_features is not None:
-                    estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
+                if has_date is not None:
+                    estimator_copy.set_params(has_time=has_date)
                 estimator = CatBoostWrapper(**kwargs)
             else:
                 if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
@@ -941,8 +957,8 @@ def _get_cat_features(
     logger.info(f"Selected categorical features: {cat_features}")
-    non_encode_features = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
-    features_to_encode = [f for f in cat_features if f not in non_encode_features]
+    features_to_encode = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
+    features_to_encode = [f for f in cat_features if f in features_to_encode]
     logger.info(f"Features to encode: {features_to_encode}")

{upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.81a3832.dev6
+Version: 1.2.81a3832.dev8
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=yNrgPKOedmyNgT4TYavHML3irFQc9hNEAf0TxhtzLzA,33
+upgini/__about__.py,sha256=M1jXitaZAXPLIGnBLF3YC2-DONuCmeKDqyDxngbrHI0,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=ODCSzFw62y_8vUrfbcZtDu0dWMIDCGYKWD2F54QDFII,210787
+upgini/features_enricher.py,sha256=WCX50iuq8_hf9AYuEfs_ZWNR7FbFc44zuXg27Z40r2s,210874
 upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
 upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
-upgini/metrics.py,sha256=lWFF_dQAWcgI7EOQlTXiLjsAEoPLxNv1PCp_egoKolc,38821
+upgini/metrics.py,sha256=nT5eIVjGZp1U1oZUE82zBSniI9gaZDf6QhRlGKJkmQ4,39831
 upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.81a3832.dev6.dist-info/METADATA,sha256=WjpXtnU3FUqspcRA2Zl-5iMqo5fqT2xIhHPJXFPcPN4,49172
-upgini-1.2.81a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.81a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.81a3832.dev6.dist-info/RECORD,,
+upgini-1.2.81a3832.dev8.dist-info/METADATA,sha256=KxJ6Hfdlki3UGenMTKIc5cc6-VnE9I34zacRVPn9lws,49172
+upgini-1.2.81a3832.dev8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.81a3832.dev8.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.81a3832.dev8.dist-info/RECORD,,

{upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.81a3832.dev6__py3-none-any.whl → 1.2.81a3832.dev8__py3-none-any.whl

Potentially problematic release.

upgini 1.2.81a3832.dev6py3-none-any.whl → 1.2.81a3832.dev8py3-none-any.whl