upgini 1.2.71a3832.dev13__py3-none-any.whl → 1.2.72__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +1 -2
- upgini/metrics.py +15 -49
- upgini/utils/target_utils.py +2 -2
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/METADATA +1 -1
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/RECORD +8 -8
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/WHEEL +0 -0
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/licenses/LICENSE +0 -0
    
        upgini/__about__.py
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            __version__ = "1.2. | 
| 1 | 
            +
            __version__ = "1.2.72"
         | 
    
        upgini/features_enricher.py
    CHANGED
    
    | @@ -3250,8 +3250,7 @@ if response.status_code == 200: | |
| 3250 3250 | 
             
                def _validate_eval_set_pair(self, X: pd.DataFrame, eval_pair: Tuple) -> Tuple[pd.DataFrame, pd.Series]:
         | 
| 3251 3251 | 
             
                    if len(eval_pair) != 2:
         | 
| 3252 3252 | 
             
                        raise ValidationError(self.bundle.get("eval_set_invalid_tuple_size").format(len(eval_pair)))
         | 
| 3253 | 
            -
                    eval_X = eval_pair | 
| 3254 | 
            -
                    eval_y = eval_pair[1]
         | 
| 3253 | 
            +
                    eval_X, eval_y = eval_pair
         | 
| 3255 3254 |  | 
| 3256 3255 | 
             
                    if _num_samples(eval_X) == 0:
         | 
| 3257 3256 | 
             
                        raise ValidationError(self.bundle.get("eval_x_is_empty"))
         | 
    
        upgini/metrics.py
    CHANGED
    
    | @@ -8,18 +8,18 @@ from copy import deepcopy | |
| 8 8 | 
             
            from dataclasses import dataclass
         | 
| 9 9 | 
             
            from typing import Any, Callable, Dict, List, Optional, Tuple, Union
         | 
| 10 10 |  | 
| 11 | 
            +
            import lightgbm as lgb
         | 
| 11 12 | 
             
            import numpy as np
         | 
| 12 13 | 
             
            import pandas as pd
         | 
| 13 14 | 
             
            from lightgbm import LGBMClassifier, LGBMRegressor
         | 
| 14 | 
            -
            import lightgbm as lgb
         | 
| 15 15 | 
             
            from numpy import log1p
         | 
| 16 16 | 
             
            from pandas.api.types import is_numeric_dtype
         | 
| 17 | 
            -
            # from sklearn.calibration import LabelEncoder
         | 
| 18 17 | 
             
            from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
         | 
| 19 18 | 
             
            from sklearn.preprocessing import OrdinalEncoder
         | 
| 20 19 |  | 
| 21 20 | 
             
            from upgini.utils.features_validator import FeaturesValidator
         | 
| 22 21 | 
             
            from upgini.utils.sklearn_ext import cross_validate
         | 
| 22 | 
            +
            from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
         | 
| 23 23 |  | 
| 24 24 | 
             
            try:
         | 
| 25 25 | 
             
                from sklearn.metrics import get_scorer_names
         | 
| @@ -31,7 +31,7 @@ except ImportError: | |
| 31 31 | 
             
                available_scorers = SCORERS
         | 
| 32 32 | 
             
            from sklearn.metrics import mean_squared_error
         | 
| 33 33 | 
             
            from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
         | 
| 34 | 
            -
            from sklearn.model_selection import BaseCrossValidator
         | 
| 34 | 
            +
            from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
         | 
| 35 35 |  | 
| 36 36 | 
             
            from upgini.errors import ValidationError
         | 
| 37 37 | 
             
            from upgini.metadata import ModelTaskType
         | 
| @@ -85,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = { | |
| 85 85 | 
             
                "auto_class_weights": "Balanced",
         | 
| 86 86 | 
             
            }
         | 
| 87 87 |  | 
| 88 | 
            -
            LIGHTGBM_PARAMS = {
         | 
| 89 | 
            -
                "random_state": DEFAULT_RANDOM_STATE,
         | 
| 90 | 
            -
                # "num_leaves": 16,
         | 
| 91 | 
            -
                # "n_estimators": 150,
         | 
| 92 | 
            -
                # "min_child_weight": 1,
         | 
| 93 | 
            -
                "max_depth": 4,
         | 
| 94 | 
            -
                "max_cat_threshold": 80,
         | 
| 95 | 
            -
                "min_data_per_group": 25,
         | 
| 96 | 
            -
                "num_boost_round": 150,
         | 
| 97 | 
            -
                "cat_l2": 10,
         | 
| 98 | 
            -
                "cat_smooth": 12,
         | 
| 99 | 
            -
                "learning_rate": 0.05,
         | 
| 100 | 
            -
                "feature_fraction": 1.0,
         | 
| 101 | 
            -
                "min_sum_hessian_in_leaf": 0.01,
         | 
| 102 | 
            -
            }
         | 
| 103 | 
            -
             | 
| 104 88 | 
             
            LIGHTGBM_REGRESSION_PARAMS = {
         | 
| 105 89 | 
             
                "random_state": DEFAULT_RANDOM_STATE,
         | 
| 106 90 | 
             
                "deterministic": True,
         | 
| @@ -127,9 +111,9 @@ LIGHTGBM_MULTICLASS_PARAMS = { | |
| 127 111 | 
             
                "max_cat_threshold": 80,
         | 
| 128 112 | 
             
                "min_data_per_group": 20,
         | 
| 129 113 | 
             
                "cat_smooth": 18,
         | 
| 130 | 
            -
                "cat_l2" | 
| 114 | 
            +
                "cat_l2": 8,
         | 
| 131 115 | 
             
                "objective": "multiclass",
         | 
| 132 | 
            -
                "class_weight": "balanced",
         | 
| 116 | 
            +
                # "class_weight": "balanced",
         | 
| 133 117 | 
             
                "use_quantized_grad": "true",
         | 
| 134 118 | 
             
                "num_grad_quant_bins": "8",
         | 
| 135 119 | 
             
                "stochastic_rounding": "true",
         | 
| @@ -143,12 +127,12 @@ LIGHTGBM_BINARY_PARAMS = { | |
| 143 127 | 
             
                "max_depth": 5,
         | 
| 144 128 | 
             
                "learning_rate": 0.05,
         | 
| 145 129 | 
             
                "objective": "binary",
         | 
| 146 | 
            -
                "class_weight": "balanced",
         | 
| 130 | 
            +
                # "class_weight": "balanced",
         | 
| 147 131 | 
             
                "deterministic": True,
         | 
| 148 132 | 
             
                "max_cat_threshold": 80,
         | 
| 149 133 | 
             
                "min_data_per_group": 20,
         | 
| 150 134 | 
             
                "cat_smooth": 18,
         | 
| 151 | 
            -
                "cat_l2" | 
| 135 | 
            +
                "cat_l2": 8,
         | 
| 152 136 | 
             
                "verbosity": -1,
         | 
| 153 137 | 
             
            }
         | 
| 154 138 |  | 
| @@ -507,7 +491,8 @@ class EstimatorWrapper: | |
| 507 491 | 
             
                            params = _get_add_params(params, add_params)
         | 
| 508 492 | 
             
                            estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
         | 
| 509 493 | 
             
                        elif target_type == ModelTaskType.REGRESSION:
         | 
| 510 | 
            -
                             | 
| 494 | 
            +
                            if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
         | 
| 495 | 
            +
                                params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
         | 
| 511 496 | 
             
                            params = _get_add_params(params, add_params)
         | 
| 512 497 | 
             
                            estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
         | 
| 513 498 | 
             
                        else:
         | 
| @@ -756,7 +741,6 @@ class LightGBMWrapper(EstimatorWrapper): | |
| 756 741 | 
             
                        logger=logger,
         | 
| 757 742 | 
             
                    )
         | 
| 758 743 | 
             
                    self.cat_features = None
         | 
| 759 | 
            -
                    # self.cat_features_encoders = dict()
         | 
| 760 744 | 
             
                    self.cat_encoder = None
         | 
| 761 745 | 
             
                    self.n_classes = None
         | 
| 762 746 |  | 
| @@ -768,23 +752,13 @@ class LightGBMWrapper(EstimatorWrapper): | |
| 768 752 | 
             
                        params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
         | 
| 769 753 | 
             
                    self.cat_features = _get_cat_features(x)
         | 
| 770 754 | 
             
                    if self.cat_features:
         | 
| 771 | 
            -
                        params["categorical_feature"] = self.cat_features
         | 
| 772 | 
            -
                        # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features] Works
         | 
| 773 | 
            -
                        # params["categorical_feature"] = "notauto"
         | 
| 774 | 
            -
                        # params["categorical_feature"] = "name:" + ",".join(self.cat_features)  # Doesn't work
         | 
| 775 | 
            -
                        # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features] Doesn't work
         | 
| 776 | 
            -
                        # params["categorical_feature"] = ",".join(cat_indices)
         | 
| 777 | 
            -
                        pass
         | 
| 778 755 | 
             
                        x = fill_na_cat_features(x, self.cat_features)
         | 
| 779 756 | 
             
                        encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
         | 
| 780 | 
            -
                        encoded =  | 
| 757 | 
            +
                        encoded = pd.DataFrame(
         | 
| 758 | 
            +
                            encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
         | 
| 759 | 
            +
                        )
         | 
| 781 760 | 
             
                        x[self.cat_features] = encoded
         | 
| 782 761 | 
             
                        self.cat_encoder = encoder
         | 
| 783 | 
            -
                    # for feature in self.cat_features:
         | 
| 784 | 
            -
                    #     encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
         | 
| 785 | 
            -
                    #     x[feature] = encoder.fit_transform(x[feature])
         | 
| 786 | 
            -
                    #     self.cat_features_encoders[feature] = encoder
         | 
| 787 | 
            -
                        # x[feature] = x[feature].astype("category").cat.codes
         | 
| 788 762 | 
             
                    if not is_numeric_dtype(y_numpy):
         | 
| 789 763 | 
             
                        y_numpy = correct_string_target(y_numpy)
         | 
| 790 764 |  | 
| @@ -793,19 +767,11 @@ class LightGBMWrapper(EstimatorWrapper): | |
| 793 767 | 
             
                def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
         | 
| 794 768 | 
             
                    x, y_numpy, params = super()._prepare_to_calculate(x, y)
         | 
| 795 769 | 
             
                    if self.cat_features is not None:
         | 
| 796 | 
            -
                        params["categorical_feature"] = self.cat_features
         | 
| 797 | 
            -
                        # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features]
         | 
| 798 | 
            -
                        # params["categorical_feature"] = "notauto"
         | 
| 799 | 
            -
                        # params["categorical_feature"] = "name:" + ",".join(self.cat_features)  # Doesn't work
         | 
| 800 | 
            -
                        # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features]
         | 
| 801 | 
            -
                        # params["categorical_feature"] = ",".join(cat_indices)
         | 
| 802 770 | 
             
                        x = fill_na_cat_features(x, self.cat_features)
         | 
| 803 771 | 
             
                        if self.cat_encoder is not None:
         | 
| 804 | 
            -
                            x[self.cat_features] =  | 
| 805 | 
            -
             | 
| 806 | 
            -
             | 
| 807 | 
            -
                        #     x[feature] = encoder.transform(x[feature])
         | 
| 808 | 
            -
                            # x[feature] = x[feature].astype("category").cat.codes
         | 
| 772 | 
            +
                            x[self.cat_features] = pd.DataFrame(
         | 
| 773 | 
            +
                                self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
         | 
| 774 | 
            +
                            )
         | 
| 809 775 | 
             
                    if not is_numeric_dtype(y):
         | 
| 810 776 | 
             
                        y_numpy = correct_string_target(y_numpy)
         | 
| 811 777 | 
             
                    return x, y_numpy, params
         | 
    
        upgini/utils/target_utils.py
    CHANGED
    
    | @@ -297,9 +297,9 @@ def balance_undersample_time_series_trunc( | |
| 297 297 | 
             
                time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
         | 
| 298 298 | 
             
                **kwargs,
         | 
| 299 299 | 
             
            ):
         | 
| 300 | 
            -
                # Convert date column to datetime
         | 
| 301 300 | 
             
                if id_columns is None:
         | 
| 302 | 
            -
                    id_columns = [ | 
| 301 | 
            +
                    id_columns = []
         | 
| 302 | 
            +
                # Convert date column to datetime
         | 
| 303 303 | 
             
                dates_df = df[id_columns + [date_column]].copy()
         | 
| 304 304 | 
             
                dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
         | 
| 305 305 |  | 
| @@ -1,12 +1,12 @@ | |
| 1 | 
            -
            upgini/__about__.py,sha256 | 
| 1 | 
            +
            upgini/__about__.py,sha256=-EK4ypqJTIRrg6g1P6PtLXT9vC4Vq7zblqFi389VgwA,23
         | 
| 2 2 | 
             
            upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
         | 
| 3 3 | 
             
            upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
         | 
| 4 4 | 
             
            upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
         | 
| 5 5 | 
             
            upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
         | 
| 6 | 
            -
            upgini/features_enricher.py,sha256= | 
| 6 | 
            +
            upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
         | 
| 7 7 | 
             
            upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
         | 
| 8 8 | 
             
            upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
         | 
| 9 | 
            -
            upgini/metrics.py,sha256= | 
| 9 | 
            +
            upgini/metrics.py,sha256=a0bY4oTMb-MgB1yC1IuTcEtotKZxAxjgV_QV2Z4V8u4,38988
         | 
| 10 10 | 
             
            upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
         | 
| 11 11 | 
             
            upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
         | 
| 12 12 | 
             
            upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
         | 
| @@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml | |
| 66 66 | 
             
            upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
         | 
| 67 67 | 
             
            upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
         | 
| 68 68 | 
             
            upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
         | 
| 69 | 
            -
            upgini/utils/target_utils.py,sha256= | 
| 69 | 
            +
            upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
         | 
| 70 70 | 
             
            upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
         | 
| 71 71 | 
             
            upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
         | 
| 72 72 | 
             
            upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
         | 
| 73 | 
            -
            upgini-1.2. | 
| 74 | 
            -
            upgini-1.2. | 
| 75 | 
            -
            upgini-1.2. | 
| 76 | 
            -
            upgini-1.2. | 
| 73 | 
            +
            upgini-1.2.72.dist-info/METADATA,sha256=OpaT2gblO8qGzEJBNf36-dPwbedHPP93bX0fPAOMl38,49091
         | 
| 74 | 
            +
            upgini-1.2.72.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
         | 
| 75 | 
            +
            upgini-1.2.72.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
         | 
| 76 | 
            +
            upgini-1.2.72.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |