PyPI - upgini - Versions diffs - 1.2.80__py3-none-any.whl → 1.2.81a3832.dev2__py3-none-any.whl - Mend

upgini 1.2.80py3-none-any.whl → 1.2.81a3832.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

upgini/__about__.py +1 -1
upgini/features_enricher.py +64 -36
upgini/http.py +21 -21
upgini/mdc/__init__.py +1 -1
upgini/metrics.py +141 -128
upgini/utils/target_utils.py +9 -6
{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/METADATA +3 -1
{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/RECORD +10 -10
{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/WHEEL +0 -0
{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.80"
1	+ __version__ = "1.2.81a3832.dev2"

upgini/features_enricher.py CHANGED Viewed

@@ -63,7 +63,7 @@ from upgini.metadata import (
     RuntimeParameters,
     SearchKey,
 )
-from upgini.metrics import EstimatorWrapper, validate_scoring_argument
+from upgini.metrics import EstimatorWrapper, define_scorer, validate_scoring_argument
 from upgini.normalizer.normalize_utils import Normalizer
 from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
 from upgini.search_task import SearchTask
@@ -310,6 +310,7 @@ class FeaturesEnricher(TransformerMixin):
                     self._search_task = search_task.poll_result(trace_id, quiet=True, check_fit=True)
                     file_metadata = self._search_task.get_file_metadata(trace_id)
                     x_columns = [c.originalName or c.name for c in file_metadata.columns]
+                    self.fit_columns_renaming = {c.name: c.originalName for c in file_metadata.columns}
                     df = pd.DataFrame(columns=x_columns)
                     self.__prepare_feature_importances(trace_id, df, silent=True)
                     # TODO validate search_keys with search_keys from file_metadata
@@ -476,7 +477,7 @@ class FeaturesEnricher(TransformerMixin):
             self.__validate_search_keys(self.search_keys)
             # Validate client estimator params
-            self._get_client_cat_features(estimator, X, self.search_keys)
+            self._get_and_validate_client_cat_features(estimator, X, self.search_keys)
             try:
                 self.X = X
@@ -957,9 +958,17 @@ class FeaturesEnricher(TransformerMixin):
                     self.__display_support_link(msg)
                     return None
-                cat_features, search_keys_for_metrics = self._get_client_cat_features(
+                cat_features_from_backend = self.__get_categorical_features()
+                client_cat_features, search_keys_for_metrics = self._get_and_validate_client_cat_features(
                     estimator, validated_X, self.search_keys
                 )
+                for cat_feature in cat_features_from_backend:
+                    original_cat_feature = self.fit_columns_renaming.get(cat_feature)
+                    if original_cat_feature in self.search_keys:
+                        if self.search_keys[original_cat_feature] in [SearchKey.COUNTRY, SearchKey.POSTAL_CODE]:
+                            search_keys_for_metrics.append(original_cat_feature)
+                        else:
+                            self.logger.warning(self.bundle.get("cat_feature_search_key").format(original_cat_feature))
                 search_keys_for_metrics.extend([c for c in self.id_columns or [] if c not in search_keys_for_metrics])
                 self.logger.info(f"Search keys for metrics: {search_keys_for_metrics}")
@@ -976,7 +985,7 @@ class FeaturesEnricher(TransformerMixin):
                     search_keys_for_metrics=search_keys_for_metrics,
                     progress_bar=progress_bar,
                     progress_callback=progress_callback,
-                    cat_features=cat_features,
+                    client_cat_features=client_cat_features,
                 )
                 if prepared_data is None:
                     return None
@@ -994,11 +1003,19 @@ class FeaturesEnricher(TransformerMixin):
                 ) = prepared_data
                 # rename cat_features
-                if cat_features:
+                if client_cat_features:
                     for new_c, old_c in columns_renaming.items():
-                        if old_c in cat_features:
-                            cat_features.remove(old_c)
-                            cat_features.append(new_c)
+                        if old_c in client_cat_features:
+                            client_cat_features.remove(old_c)
+                            client_cat_features.append(new_c)
+                    for cat_feature in client_cat_features:
+                        if cat_feature not in fitting_X.columns:
+                            self.logger.error(
+                                f"Client cat_feature `{cat_feature}` not found in"
+                                f" x columns: {fitting_X.columns.to_list()}"
+                            )
+                else:
+                    client_cat_features = []
                 gc.collect()
@@ -1019,20 +1036,16 @@ class FeaturesEnricher(TransformerMixin):
                     has_date = self._get_date_column(search_keys) is not None
                     model_task_type = self.model_task_type or define_task(y_sorted, has_date, self.logger, silent=True)
+                    cat_features = list(set(client_cat_features + cat_features_from_backend))
+                    baseline_cat_features = [f for f in cat_features if f in fitting_X.columns]
+                    enriched_cat_features = [f for f in cat_features if f in fitting_enriched_X.columns]
+                    if len(enriched_cat_features) < len(cat_features):
+                        missing_cat_features = [f for f in cat_features if f not in fitting_enriched_X.columns]
+                        self.logger.warning(
+                            f"Some cat_features were not found in enriched_X: {missing_cat_features}"
+                        )
-                    wrapper = EstimatorWrapper.create(
-                        estimator,
-                        self.logger,
-                        model_task_type,
-                        _cv,
-                        fitting_enriched_X,
-                        scoring,
-                        groups=groups,
-                        text_features=text_features,
-                        has_date=has_date,
-                    )
-                    metric = wrapper.metric_name
-                    multiplier = wrapper.multiplier
+                    _, metric, multiplier = define_scorer(model_task_type, scoring)
                     # 1 If client features are presented - fit and predict with KFold estimator
                     # on etalon features and calculate baseline metric
@@ -1050,9 +1063,8 @@ class FeaturesEnricher(TransformerMixin):
                             self.logger,
                             model_task_type,
                             _cv,
-                            fitting_enriched_X,
-                            scoring,
-                            cat_features,
+                            scoring=scoring,
+                            cat_features=baseline_cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
                             text_features=text_features,
@@ -1085,9 +1097,8 @@ class FeaturesEnricher(TransformerMixin):
                             self.logger,
                             model_task_type,
                             _cv,
-                            fitting_enriched_X,
-                            scoring,
-                            cat_features,
+                            scoring=scoring,
+                            cat_features=enriched_cat_features,
                             add_params=custom_loss_add_params,
                             groups=groups,
                             text_features=text_features,
@@ -1420,7 +1431,7 @@ class FeaturesEnricher(TransformerMixin):
         return _cv, groups
-    def _get_client_cat_features(
+    def _get_and_validate_client_cat_features(
         self, estimator: Optional[Any], X: pd.DataFrame, search_keys: Dict[str, SearchKey]
     ) -> Tuple[Optional[List[str]], List[str]]:
         cat_features = None
@@ -1428,12 +1439,20 @@ class FeaturesEnricher(TransformerMixin):
         if (
             estimator is not None
             and hasattr(estimator, "get_param")
+            and hasattr(estimator, "_init_params")
             and estimator.get_param("cat_features") is not None
         ):
-            cat_features = estimator.get_param("cat_features")
-            if len(cat_features) > 0:
-                if all([isinstance(f, int) for f in cat_features]):
-                    cat_features = [X.columns[i] for i in cat_features]
+            estimator_cat_features = estimator.get_param("cat_features")
+            if all([isinstance(c, int) for c in estimator_cat_features]):
+                cat_features = [X.columns[idx] for idx in estimator_cat_features]
+            elif all([isinstance(c, str) for c in estimator_cat_features]):
+                cat_features = estimator_cat_features
+            else:
+                print(f"WARNING: Unsupported type of cat_features in CatBoost estimator: {estimator_cat_features}")
+            del estimator._init_params["cat_features"]
+            if cat_features:
                 self.logger.info(f"Collected categorical features {cat_features} from user estimator")
                 for cat_feature in cat_features:
                     if cat_feature in search_keys:
@@ -1457,7 +1476,7 @@ class FeaturesEnricher(TransformerMixin):
         search_keys_for_metrics: Optional[List[str]] = None,
         progress_bar: Optional[ProgressBar] = None,
         progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
-        cat_features: Optional[List[str]] = None,
+        client_cat_features: Optional[List[str]] = None,
     ):
         is_input_same_as_fit, X, y, eval_set = self._is_input_same_as_fit(X, y, eval_set)
         is_demo_dataset = hash_input(X, y, eval_set) in DEMO_DATASET_HASHES
@@ -1531,7 +1550,7 @@ class FeaturesEnricher(TransformerMixin):
         # Detect and drop high cardinality columns in train
         columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
-        non_excluding_columns = (self.generate_features or []) + (cat_features or [])
+        non_excluding_columns = (self.generate_features or []) + (client_cat_features or [])
         columns_with_high_cardinality = [c for c in columns_with_high_cardinality if c not in non_excluding_columns]
         if len(columns_with_high_cardinality) > 0:
             self.logger.warning(
@@ -2069,10 +2088,12 @@ class FeaturesEnricher(TransformerMixin):
         search_keys: Dict,
         columns_renaming: Dict[str, str],
     ):
+        # X_sampled - with hash-suffixes
+        reversed_renaming = {v: k for k, v in columns_renaming.items()}
         search_keys = {
-            columns_renaming.get(k, k): v
+            reversed_renaming.get(k, k): v
             for k, v in search_keys.items()
-            if columns_renaming.get(k, k) in X_sampled.columns.to_list()
+            if reversed_renaming.get(k, k) in X_sampled.columns.to_list()
         }
         return FeaturesEnricher._SampledDataForMetrics(
             X_sampled=X_sampled,
@@ -3855,6 +3876,13 @@ if response.status_code == 200:
         return importances
+    def __get_categorical_features(self) -> List[str]:
+        features_meta = self._search_task.get_all_features_metadata_v2()
+        if features_meta is None:
+            raise Exception(self.bundle.get("missing_features_meta"))
+        return [f.name for f in features_meta if f.type == "categorical" and f.shap_value > 0.0]
     def __prepare_feature_importances(
         self, trace_id: str, df: pd.DataFrame, updated_shaps: Optional[Dict[str, float]] = None, silent=False
     ):

upgini/http.py CHANGED Viewed

@@ -20,7 +20,7 @@ import jwt
 # import pandas as pd
 import requests
 from pydantic import BaseModel
-from pythonjsonlogger import jsonlogger
+from pythonjsonlogger import json as jsonlogger
 from requests.exceptions import RequestException
 from upgini.__about__ import __version__
@@ -459,19 +459,19 @@ class _RestClient:
                 content = file.read()
                 md5_hash.update(content)
                 digest = md5_hash.hexdigest()
-                metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
+                metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
             # digest_sha256 = hashlib.sha256(
             #     pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
             # ).hexdigest()
             digest_sha256 = self.compute_file_digest(file_path)
-            metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
+            metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
             with open(file_path, "rb") as file:
                 files = {
                     "metadata": (
                         "metadata.json",
-                        metadata_with_md5.json(exclude_none=True).encode(),
+                        metadata_with_md5.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "tracking": (
@@ -481,7 +481,7 @@ class _RestClient:
                     ),
                     "metrics": (
                         "metrics.json",
-                        metrics.json(exclude_none=True).encode(),
+                        metrics.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "file": (metadata_with_md5.name, file, "application/octet-stream"),
@@ -489,7 +489,7 @@ class _RestClient:
                 if search_customization is not None:
                     files["customization"] = (
                         "customization.json",
-                        search_customization.json(exclude_none=True).encode(),
+                        search_customization.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     )
                 additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
@@ -504,7 +504,7 @@ class _RestClient:
     def check_uploaded_file_v2(self, trace_id: str, file_upload_id: str, metadata: FileMetadata) -> bool:
         api_path = self.CHECK_UPLOADED_FILE_URL_FMT_V2.format(file_upload_id)
         response = self._with_unauth_retry(
-            lambda: self._send_post_req(api_path, trace_id, metadata.json(exclude_none=True))
+            lambda: self._send_post_req(api_path, trace_id, metadata.model_dump_json(exclude_none=True))
         )
         return bool(response)
@@ -518,11 +518,11 @@ class _RestClient:
     ) -> SearchTaskResponse:
         api_path = self.INITIAL_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id)
         files = {
-            "metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
-            "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+            "metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
+            "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
         }
         if search_customization is not None:
-            files["customization"] = search_customization.json(exclude_none=True).encode()
+            files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
         additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
         response = self._with_unauth_retry(
             lambda: self._send_post_file_req_v2(
@@ -548,19 +548,19 @@ class _RestClient:
                 content = file.read()
                 md5_hash.update(content)
                 digest = md5_hash.hexdigest()
-                metadata_with_md5 = metadata.copy(update={"checksumMD5": digest})
+                metadata_with_md5 = metadata.model_copy(update={"checksumMD5": digest})
             # digest_sha256 = hashlib.sha256(
             #     pd.util.hash_pandas_object(pd.read_parquet(file_path, engine="fastparquet")).values
             # ).hexdigest()
             digest_sha256 = self.compute_file_digest(file_path)
-            metadata_with_md5 = metadata_with_md5.copy(update={"digest": digest_sha256})
+            metadata_with_md5 = metadata_with_md5.model_copy(update={"digest": digest_sha256})
             with open(file_path, "rb") as file:
                 files = {
                     "metadata": (
                         "metadata.json",
-                        metadata_with_md5.json(exclude_none=True).encode(),
+                        metadata_with_md5.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "tracking": (
@@ -570,7 +570,7 @@ class _RestClient:
                     ),
                     "metrics": (
                         "metrics.json",
-                        metrics.json(exclude_none=True).encode(),
+                        metrics.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                     "file": (metadata_with_md5.name, file, "application/octet-stream"),
@@ -578,7 +578,7 @@ class _RestClient:
                 if search_customization is not None:
                     files["customization"] = (
                         "customization.json",
-                        search_customization.json(exclude_none=True).encode(),
+                        search_customization.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     )
@@ -602,11 +602,11 @@ class _RestClient:
     ) -> SearchTaskResponse:
         api_path = self.VALIDATION_SEARCH_WITHOUT_UPLOAD_URI_FMT_V2.format(file_upload_id, initial_search_task_id)
         files = {
-            "metadata": ("metadata.json", metadata.json(exclude_none=True).encode(), "application/json"),
-            "metrics": ("metrics.json", metrics.json(exclude_none=True).encode(), "application/json"),
+            "metadata": ("metadata.json", metadata.model_dump_json(exclude_none=True).encode(), "application/json"),
+            "metrics": ("metrics.json", metrics.model_dump_json(exclude_none=True).encode(), "application/json"),
         }
         if search_customization is not None:
-            files["customization"] = search_customization.json(exclude_none=True).encode()
+            files["customization"] = search_customization.model_dump_json(exclude_none=True).encode()
         additional_headers = {self.SEARCH_KEYS_HEADER_NAME: ",".join(self.search_keys_meaning_types(metadata))}
         response = self._with_unauth_retry(
             lambda: self._send_post_file_req_v2(
@@ -670,7 +670,7 @@ class _RestClient:
                     "file": (metadata.name, file, "application/octet-stream"),
                     "metadata": (
                         "metadata.json",
-                        metadata.json(exclude_none=True).encode(),
+                        metadata.model_dump_json(exclude_none=True).encode(),
                         "application/json",
                     ),
                 }
@@ -682,12 +682,12 @@ class _RestClient:
     def get_search_file_metadata(self, search_task_id: str, trace_id: str) -> FileMetadata:
         api_path = self.SEARCH_FILE_METADATA_URI_FMT_V2.format(search_task_id)
         response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
-        return FileMetadata.parse_obj(response)
+        return FileMetadata.model_validate(response)
     def get_provider_search_metadata_v3(self, provider_search_task_id: str, trace_id: str) -> ProviderTaskMetadataV2:
         api_path = self.SEARCH_TASK_METADATA_FMT_V3.format(provider_search_task_id)
         response = self._with_unauth_retry(lambda: self._send_get_req(api_path, trace_id))
-        return ProviderTaskMetadataV2.parse_obj(response)
+        return ProviderTaskMetadataV2.model_validate(response)
     def get_current_transform_usage(self, trace_id) -> TransformUsage:
         track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)

upgini/mdc/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@
 import logging
-from pythonjsonlogger import jsonlogger
+from pythonjsonlogger import json as jsonlogger
 from upgini.mdc.context import get_mdc_fields, new_log_context

upgini/metrics.py CHANGED Viewed

@@ -11,15 +11,16 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import lightgbm as lgb
 import numpy as np
 import pandas as pd
+from catboost import CatBoostClassifier, CatBoostRegressor
+from category_encoders.cat_boost import CatBoostEncoder
 from lightgbm import LGBMClassifier, LGBMRegressor
 from numpy import log1p
 from pandas.api.types import is_numeric_dtype
 from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
-from sklearn.preprocessing import OrdinalEncoder
+# from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
 from upgini.utils.features_validator import FeaturesValidator
 from upgini.utils.sklearn_ext import cross_validate
-from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
 try:
     from sklearn.metrics import get_scorer_names
@@ -31,12 +32,12 @@ except ImportError:
     available_scorers = SCORERS
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
-from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
+from sklearn.model_selection import BaseCrossValidator  # , TimeSeriesSplit
 from upgini.errors import ValidationError
 from upgini.metadata import ModelTaskType
 from upgini.resource_bundle import bundle
-from upgini.utils.target_utils import correct_string_target
+from upgini.utils.target_utils import prepare_target
 DEFAULT_RANDOM_STATE = 42
@@ -287,6 +288,7 @@ class EstimatorWrapper:
         self,
         estimator,
         scorer: Callable,
+        cat_features: Optional[List[str]],
         metric_name: str,
         multiplier: int,
         cv: BaseCrossValidator,
@@ -298,9 +300,8 @@ class EstimatorWrapper:
     ):
         self.estimator = estimator
         self.scorer = scorer
-        self.metric_name = (
-            "GINI" if metric_name.upper() == "ROC_AUC" and target_type == ModelTaskType.BINARY else metric_name
-        )
+        self.cat_features = cat_features
+        self.metric_name = metric_name
         self.multiplier = multiplier
         self.cv = cv
         self.target_type = target_type
@@ -328,10 +329,14 @@ class EstimatorWrapper:
     ) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
         self.logger.info(f"Before preparing data columns: {x.columns.to_list()}")
         for c in x.columns:
-            if is_numeric_dtype(x[c]):
-                x[c] = x[c].astype(float)
-            elif not x[c].dtype == "category":
-                x[c] = x[c].astype(str)
+            if c not in self.cat_features:
+                if is_numeric_dtype(x[c]):
+                    x[c] = x[c].astype(float)
+                elif not x[c].dtype == "category":
+                    x[c] = x[c].astype(str)
+            else:
+                if x[c].dtype == "category" and x[c].cat.categories.dtype == np.int64:
+                    x[c] = x[c].astype(np.int64)
         if not isinstance(y, pd.Series):
             raise Exception(bundle.get("metrics_unsupported_target_type").format(type(y)))
@@ -345,6 +350,8 @@ class EstimatorWrapper:
         else:
             x, y = self._remove_empty_target_rows(x, y)
+        y = prepare_target(y, self.target_type)
         self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
         return x, y, groups
@@ -409,7 +416,6 @@ class EstimatorWrapper:
                 shaps = self.calculate_shap(cv_x, cv_y, estimator)
                 if shaps is not None:
                     for feature, shap_value in shaps.items():
-                        # shap_values_all_folds[feature] = shap_values_all_folds.get(feature, []) + shap_value.tolist()
                         shap_values_all_folds[feature].append(shap_value)
         if shap_values_all_folds:
@@ -465,7 +471,7 @@ class EstimatorWrapper:
         logger: logging.Logger,
         target_type: ModelTaskType,
         cv: BaseCrossValidator,
-        x: pd.DataFrame,
+        *,
         scoring: Union[Callable, str, None] = None,
         cat_features: Optional[List[str]] = None,
         text_features: Optional[List[str]] = None,
@@ -473,9 +479,10 @@ class EstimatorWrapper:
         groups: Optional[List[str]] = None,
         has_date: Optional[bool] = None,
     ) -> EstimatorWrapper:
-        scorer, metric_name, multiplier = _get_scorer(target_type, scoring)
+        scorer, metric_name, multiplier = define_scorer(target_type, scoring)
         kwargs = {
             "scorer": scorer,
+            "cat_features": cat_features,
             "metric_name": metric_name,
             "multiplier": multiplier,
             "cv": cv,
@@ -485,20 +492,29 @@ class EstimatorWrapper:
             "logger": logger,
         }
         if estimator is None:
-            params = {"random_state": DEFAULT_RANDOM_STATE, "verbose": -1}
+            params = {"has_time": has_date}
             if target_type == ModelTaskType.MULTICLASS:
-                params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
+                params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
                 params = _get_add_params(params, add_params)
-                estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
+                estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
+                # params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
+                # params = _get_add_params(params, add_params)
+                # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
             elif target_type == ModelTaskType.BINARY:
-                params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
+                params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
                 params = _get_add_params(params, add_params)
-                estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
+                estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
+                # params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
+                # params = _get_add_params(params, add_params)
+                # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
             elif target_type == ModelTaskType.REGRESSION:
-                if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
-                    params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
+                params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
                 params = _get_add_params(params, add_params)
-                estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
+                estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
+                # if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
+                #     params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
+                # params = _get_add_params(params, add_params)
+                # estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
             else:
                 raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
         else:
@@ -509,18 +525,11 @@ class EstimatorWrapper:
             kwargs["estimator"] = estimator_copy
             if is_catboost_estimator(estimator):
                 if cat_features is not None:
-                    for cat_feature in cat_features:
-                        if cat_feature not in x.columns:
-                            logger.error(
-                                f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
-                            )
                     estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
                 estimator = CatBoostWrapper(**kwargs)
             else:
                 if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
                     estimator = LightGBMWrapper(**kwargs)
-                elif is_catboost_estimator(estimator):
-                    estimator = CatBoostWrapper(**kwargs)
                 else:
                     logger.warning(
                         f"Unexpected estimator is used for metrics: {estimator}. "
@@ -536,6 +545,7 @@ class CatBoostWrapper(EstimatorWrapper):
         self,
         estimator,
         scorer: Callable,
+        cat_features: Optional[List[str]],
         metric_name: str,
         multiplier: int,
         cv: BaseCrossValidator,
@@ -547,6 +557,7 @@ class CatBoostWrapper(EstimatorWrapper):
         super(CatBoostWrapper, self).__init__(
             estimator,
             scorer,
+            cat_features,
             metric_name,
             multiplier,
             cv,
@@ -555,10 +566,10 @@ class CatBoostWrapper(EstimatorWrapper):
             text_features=text_features,
             logger=logger,
         )
-        self.cat_features = None
         self.emb_features = None
         self.grouped_embedding_features = None
-        self.exclude_features = []
+        self.drop_cat_features = []
+        self.features_to_encode = []
     def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, dict]:
         x, y, groups, params = super()._prepare_to_fit(x, y)
@@ -595,37 +606,16 @@ class CatBoostWrapper(EstimatorWrapper):
             self.logger.warning(f"Text features are not supported by this Catboost version {catboost.__version__}")
         # Find rest categorical features
-        self.cat_features = _get_cat_features(x, self.text_features, self.grouped_embedding_features)
-        # x = fill_na_cat_features(x, self.cat_features)
-        unique_cat_features = []
-        for name in self.cat_features:
-            # Remove constant categorical features
-            if x[name].nunique() > 1:
-                unique_cat_features.append(name)
-            else:
-                self.logger.info(f"Drop column {name} on preparing data for fit")
-                x = x.drop(columns=name)
-                self.exclude_features.append(name)
-        self.cat_features = unique_cat_features
-        if (
-            hasattr(self.estimator, "get_param")
-            and hasattr(self.estimator, "_init_params")
-            and self.estimator.get_param("cat_features") is not None
-        ):
-            estimator_cat_features = self.estimator.get_param("cat_features")
-            if all([isinstance(c, int) for c in estimator_cat_features]):
-                cat_features_idx = {x.columns.get_loc(c) for c in self.cat_features}
-                cat_features_idx.update(estimator_cat_features)
-                self.cat_features = [x.columns[idx] for idx in cat_features_idx]
-            elif all([isinstance(c, str) for c in estimator_cat_features]):
-                self.cat_features = list(set(self.cat_features + estimator_cat_features))
-            else:
-                print(f"WARNING: Unsupported type of cat_features in CatBoost estimator: {estimator_cat_features}")
-            del self.estimator._init_params["cat_features"]
-        self.logger.info(f"Selected categorical features: {self.cat_features}")
-        params["cat_features"] = self.cat_features
+        self.cat_features, self.features_to_encode, self.exclude_features = _get_cat_features(
+            self.logger, x, self.cat_features, self.text_features, self.grouped_embedding_features
+        )
+        if self.features_to_encode:
+            for c in self.features_to_encode:
+                if is_numeric_dtype(x[c]):
+                    x[c] = x[c].fillna(np.nan)
+                else:
+                    x[c] = x[c].fillna("NA")
+            params["cat_features"] = self.features_to_encode
         return x, y, groups, params
@@ -654,9 +644,14 @@ class CatBoostWrapper(EstimatorWrapper):
         if self.grouped_embedding_features:
             x, emb_columns = self.group_embeddings(x)
             params["embedding_features"] = emb_columns
-        if self.cat_features:
-            # x = fill_na_cat_features(x, self.cat_features)
-            params["cat_features"] = self.cat_features
+        if self.features_to_encode:
+            for c in self.features_to_encode:
+                if is_numeric_dtype(x[c]):
+                    x[c] = x[c].fillna(np.nan)
+                else:
+                    x[c] = x[c].fillna("NA")
+            params["cat_features"] = self.features_to_encode
         return x, y, params
@@ -700,23 +695,29 @@ class CatBoostWrapper(EstimatorWrapper):
                 embedding_features=self.grouped_embedding_features,
             )
-            # Get SHAP values of current estimator
-            shap_values_fold = estimator.get_feature_importance(data=fold_pool, type="ShapValues")
+            shap_values = estimator.get_feature_importance(data=fold_pool, type="ShapValues")
-            # Remove last columns (base value) and flatten
             if self.target_type == ModelTaskType.MULTICLASS:
-                all_shaps = shap_values_fold[:, :, :-1]
-                all_shaps = [all_shaps[:, :, k].flatten() for k in range(all_shaps.shape[2])]
+                # For multiclass, shap_values has shape (n_samples, n_classes, n_features + 1)
+                # Last column is bias term
+                shap_values = shap_values[:, :, :-1]  # Remove bias term
+                # Average SHAP values across classes
+                shap_values = np.mean(np.abs(shap_values), axis=1)
             else:
-                all_shaps = shap_values_fold[:, :-1]
-                all_shaps = [all_shaps[:, k].flatten() for k in range(all_shaps.shape[1])]
+                # For binary/regression, shap_values has shape (n_samples, n_features + 1)
+                # Last column is bias term
+                shap_values = shap_values[:, :-1]  # Remove bias term
+                # Take absolute values
+                shap_values = np.abs(shap_values)
-            all_shaps = np.abs(all_shaps)
+            feature_importance = {}
+            for i, col in enumerate(x.columns):
+                feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
-            return dict(zip(estimator.feature_names_, all_shaps))
+            return feature_importance
-        except Exception:
-            self.logger.exception("Failed to recalculate new SHAP values")
+        except Exception as e:
+            self.logger.exception(f"Failed to recalculate new SHAP values: {str(e)}")
             return None
@@ -725,6 +726,7 @@ class LightGBMWrapper(EstimatorWrapper):
         self,
         estimator,
         scorer: Callable,
+        cat_features: Optional[List[str]],
         metric_name: str,
         multiplier: int,
         cv: BaseCrossValidator,
@@ -736,6 +738,7 @@ class LightGBMWrapper(EstimatorWrapper):
         super(LightGBMWrapper, self).__init__(
             estimator,
             scorer,
+            cat_features,
             metric_name,
             multiplier,
             cv,
@@ -744,9 +747,10 @@ class LightGBMWrapper(EstimatorWrapper):
             text_features=text_features,
             logger=logger,
         )
-        self.cat_features = None
         self.cat_encoder = None
         self.n_classes = None
+        self.exclude_features = []
+        self.features_to_encode = []
     def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
         x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
@@ -756,30 +760,25 @@ class LightGBMWrapper(EstimatorWrapper):
             if self.target_type == ModelTaskType.BINARY:
                 params["eval_metric"] = "auc"
             params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
-        self.cat_features = _get_cat_features(x)
-        if self.cat_features:
-            # x = fill_na_cat_features(x, self.cat_features)
-            encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
-            encoded = pd.DataFrame(
-                encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
-            )
-            x[self.cat_features] = encoded
+        self.cat_features, self.features_to_encode, self.exclude_features = _get_cat_features(
+            self.logger, x, self.cat_features
+        )
+        if self.features_to_encode:
+            encoder = CatBoostEncoder(random_state=DEFAULT_RANDOM_STATE, return_df=True)
+            encoded = encoder.fit_transform(x[self.features_to_encode].astype("object"), y_numpy).astype("category")
+            x[self.features_to_encode] = encoded
             self.cat_encoder = encoder
-        if not is_numeric_dtype(y_numpy):
-            y_numpy = correct_string_target(y_numpy)
         return x, y_numpy, groups, params
     def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
+        if self.exclude_features:
+            x = x.drop(columns=self.exclude_features)
         x, y_numpy, params = super()._prepare_to_calculate(x, y)
-        if self.cat_features is not None:
-            # x = fill_na_cat_features(x, self.cat_features)
-            if self.cat_encoder is not None:
-                x[self.cat_features] = pd.DataFrame(
-                    self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
-                )
-        if not is_numeric_dtype(y):
-            y_numpy = correct_string_target(y_numpy)
+        if self.features_to_encode is not None and self.cat_encoder is not None:
+            x[self.features_to_encode] = self.cat_encoder.transform(x[self.features_to_encode].astype("object")).astype(
+                "category"
+            )
         return x, y_numpy, params
     def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
@@ -805,20 +804,6 @@ class LightGBMWrapper(EstimatorWrapper):
             for i, col in enumerate(x.columns):
                 feature_importance[col] = np.mean(np.abs(shap_matrix[:, i]))
-            # # exclude last column (base value)
-            # shap_values_only = shap_values[:, :-1]
-            # mean_abs_shap = np.mean(np.abs(shap_values_only), axis=0)
-            # # For classification, shap_values is returned as a list for each class
-            # # Take values for the positive class
-            # if isinstance(shap_values, list):
-            #     shap_values = shap_values[1]
-            # # Calculate mean absolute SHAP value for each feature
-            # feature_importance = {}
-            # for i, col in enumerate(x.columns):
-            #     feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
             return feature_importance
         except Exception as e:
@@ -831,6 +816,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
         self,
         estimator,
         scorer: Callable,
+        cat_features: Optional[List[str]],
         metric_name: str,
         multiplier: int,
         cv: BaseCrossValidator,
@@ -842,6 +828,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
         super(OtherEstimatorWrapper, self).__init__(
             estimator,
             scorer,
+            cat_features,
             metric_name,
             multiplier,
             cv,
@@ -850,32 +837,32 @@ class OtherEstimatorWrapper(EstimatorWrapper):
             text_features=text_features,
             logger=logger,
         )
-        self.cat_features = None
     def _prepare_to_fit(self, x: pd.DataFrame, y: np.ndarray) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray, dict]:
-        x, y, groups, params = super()._prepare_to_fit(x, y)
-        self.cat_features = _get_cat_features(x)
+        x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
+        self.cat_features, self.features_to_encode, self.exclude_features = _get_cat_features(
+            self.logger, x, self.cat_features
+        )
         num_features = [col for col in x.columns if col not in self.cat_features]
         x[num_features] = x[num_features].fillna(-999)
-        # x = fill_na_cat_features(x, self.cat_features)
-        # TODO use one-hot encoding if cardinality is less 50
-        for feature in self.cat_features:
-            x[feature] = x[feature].astype("category").cat.codes
-        if not is_numeric_dtype(y):
-            y = correct_string_target(y)
-        return x, y, groups, params
+        if self.cat_features:
+            encoder = CatBoostEncoder(random_state=DEFAULT_RANDOM_STATE, return_df=True)
+            encoded = encoder.fit_transform(x[self.cat_features].astype("object"), y_numpy).astype("category")
+            x[self.cat_features] = encoded
+            self.cat_encoder = encoder
+        return x, y_numpy, groups, params
     def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
+        if self.exclude_features:
+            x = x.drop(columns=self.exclude_features)
         x, y, params = super()._prepare_to_calculate(x, y)
         if self.cat_features is not None:
             num_features = [col for col in x.columns if col not in self.cat_features]
             x[num_features] = x[num_features].fillna(-999)
-            # x = fill_na_cat_features(x, self.cat_features)
-            # TODO use one-hot encoding if cardinality is less 50
-            for feature in self.cat_features:
-                x[feature] = x[feature].astype("category").cat.codes
-        if not is_numeric_dtype(y):
-            y = correct_string_target(y)
+            if self.features_to_encode and self.cat_encoder is not None:
+                x[self.features_to_encode] = self.cat_encoder.transform(
+                    x[self.features_to_encode].astype("object")
+                ).astype("category")
         return x, y, params
@@ -938,7 +925,7 @@ def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
     return scoring, metric_name, multiplier
-def _get_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None]) -> Tuple[Callable, str, int]:
+def define_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None]) -> Tuple[Callable, str, int]:
     if scoring is None:
         if target_type == ModelTaskType.BINARY:
             scoring = "roc_auc"
@@ -957,16 +944,42 @@ def _get_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None])
     else:
         metric_name = str(scoring)
+    metric_name = "GINI" if metric_name.upper() == "ROC_AUC" and target_type == ModelTaskType.BINARY else metric_name
     return scoring, metric_name, multiplier
 def _get_cat_features(
-    x: pd.DataFrame, text_features: Optional[List[str]] = None, emb_features: Optional[List[str]] = None
+    logger: logging.Logger,
+    x: pd.DataFrame,
+    cat_features: Optional[List[str]],
+    text_features: Optional[List[str]] = None,
+    emb_features: Optional[List[str]] = None,
 ) -> List[str]:
+    cat_features = cat_features or []
     text_features = text_features or []
     emb_features = emb_features or []
     exclude_features = text_features + emb_features
-    return [c for c in x.columns if c not in exclude_features and not is_numeric_dtype(x[c])]
+    cat_features = [c for c in cat_features if c not in exclude_features]
+    unique_cat_features = []
+    drop_cat_features = []
+    for name in cat_features:
+        # Remove constant categorical features
+        if x[name].nunique() > 1:
+            unique_cat_features.append(name)
+        else:
+            logger.info(f"Drop column {name} on preparing data for fit")
+            x = x.drop(columns=name)
+            drop_cat_features.append(name)
+    cat_features = unique_cat_features
+    logger.info(f"Selected categorical features: {cat_features}")
+    features_to_encode = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
+    logger.info(f"Features to encode: {features_to_encode}")
+    return cat_features, features_to_encode, drop_cat_features
 def _get_add_params(input_params, add_params):

upgini/utils/target_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Callable, List, Optional, Union
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_numeric_dtype, is_bool_dtype
+from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
 from upgini.errors import ValidationError
 from upgini.metadata import SYSTEM_RECORD_ID, CVType, ModelTaskType
@@ -14,11 +14,14 @@ from upgini.utils.ts_utils import get_most_frequent_time_unit, trunc_datetime
 TS_MIN_DIFFERENT_IDS_RATIO = 0.2
-def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
-    if isinstance(y, pd.Series):
-        return y.astype(str).astype("category").cat.codes
-    elif isinstance(y, np.ndarray):
-        return pd.Series(y).astype(str).astype("category").cat.codes.values
+def prepare_target(y: Union[pd.Series, np.ndarray], target_type: ModelTaskType) -> Union[pd.Series, np.ndarray]:
+    if target_type != ModelTaskType.REGRESSION or (not is_numeric_dtype(y) and not is_datetime64_any_dtype(y)):
+        if isinstance(y, pd.Series):
+            y = y.astype(str).astype("category").cat.codes
+        elif isinstance(y, np.ndarray):
+            y = pd.Series(y).astype(str).astype("category").cat.codes.values
+    return y
 def define_task(

{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.80
+Version: 1.2.81a3832.dev2
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -22,6 +22,8 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Requires-Python: <3.12,>=3.10
+Requires-Dist: catboost>=1.2.8
+Requires-Dist: category-encoders>=2.8.1
 Requires-Dist: fastparquet>=0.8.1
 Requires-Dist: ipywidgets>=8.1.0
 Requires-Dist: jarowinkler>=2.0.0

{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
+upgini/__about__.py,sha256=7ytM9g8DI6H-u5aMwPu2Qxa34E_K8afMwp4RaWapTSw,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
-upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
+upgini/features_enricher.py,sha256=WiSVfmlHI9oKJQbyf46FH0yY80hBJ6hheFpugw0f_vE,210583
+upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
 upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
-upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
+upgini/metrics.py,sha256=KxtcjiClNDNlMWpoCbAvVPveC59Nz7z2lA4b-hQozRE,39608
 upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -32,7 +32,7 @@ upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9e
 upgini/autofe/timeseries/volatility.py,sha256=9shUmIKjpWTHVYjj80YBsk0XheBJ9uBuLv5NW9Mchnk,7953
 upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
-upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
+upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
 upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
 upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
 upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
 upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
 upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
-upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
+upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,16832
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
-upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.80.dist-info/RECORD,,
+upgini-1.2.81a3832.dev2.dist-info/METADATA,sha256=Kdxh014FUNln4eeF-RflHu3c_pfvPXpsoXfvb6SBneE,49172
+upgini-1.2.81a3832.dev2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.81a3832.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.81a3832.dev2.dist-info/RECORD,,

{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.80.dist-info → upgini-1.2.81a3832.dev2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.80__py3-none-any.whl → 1.2.81a3832.dev2__py3-none-any.whl

upgini 1.2.80py3-none-any.whl → 1.2.81a3832.dev2py3-none-any.whl