upgini 1.2.72a3659.dev1__py3-none-any.whl → 1.2.73a3659.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/vector.py +3 -3
- upgini/metrics.py +6 -20
- upgini/utils/target_utils.py +2 -2
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73a3659.dev2.dist-info}/METADATA +1 -1
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73a3659.dev2.dist-info}/RECORD +8 -8
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73a3659.dev2.dist-info}/WHEEL +0 -0
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73a3659.dev2.dist-info}/licenses/LICENSE +0 -0
    
        upgini/__about__.py
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            __version__ = "1.2. | 
| 1 | 
            +
            __version__ = "1.2.73a3659.dev2"
         | 
    
        upgini/autofe/vector.py
    CHANGED
    
    | @@ -2,7 +2,7 @@ from typing import Dict, List, Optional | |
| 2 2 |  | 
| 3 3 | 
             
            import pandas as pd
         | 
| 4 4 |  | 
| 5 | 
            -
            from upgini.autofe.operator import PandasOperator, VectorizableMixin
         | 
| 5 | 
            +
            from upgini.autofe.operator import OperatorRegistry, PandasOperator, VectorizableMixin
         | 
| 6 6 |  | 
| 7 7 |  | 
| 8 8 | 
             
            class Mean(PandasOperator, VectorizableMixin):
         | 
| @@ -24,11 +24,11 @@ class Sum(PandasOperator, VectorizableMixin): | |
| 24 24 | 
             
                    return pd.DataFrame(data).T.fillna(0).sum(axis=1)
         | 
| 25 25 |  | 
| 26 26 |  | 
| 27 | 
            -
            class OnnxModel(PandasOperator):
         | 
| 27 | 
            +
            class OnnxModel(PandasOperator, metaclass=OperatorRegistry):
         | 
| 28 28 | 
             
                name: str = "onnx"
         | 
| 29 29 | 
             
                is_vector: bool = True
         | 
| 30 30 | 
             
                output_type: Optional[str] = "float"
         | 
| 31 | 
            -
                model_name: str
         | 
| 31 | 
            +
                model_name: str = ""
         | 
| 32 32 |  | 
| 33 33 | 
             
                def get_params(self) -> Dict[str, Optional[str]]:
         | 
| 34 34 | 
             
                    res = super().get_params()
         | 
    
        upgini/metrics.py
    CHANGED
    
    | @@ -19,6 +19,7 @@ from sklearn.preprocessing import OrdinalEncoder | |
| 19 19 |  | 
| 20 20 | 
             
            from upgini.utils.features_validator import FeaturesValidator
         | 
| 21 21 | 
             
            from upgini.utils.sklearn_ext import cross_validate
         | 
| 22 | 
            +
            from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
         | 
| 22 23 |  | 
| 23 24 | 
             
            try:
         | 
| 24 25 | 
             
                from sklearn.metrics import get_scorer_names
         | 
| @@ -30,7 +31,7 @@ except ImportError: | |
| 30 31 | 
             
                available_scorers = SCORERS
         | 
| 31 32 | 
             
            from sklearn.metrics import mean_squared_error
         | 
| 32 33 | 
             
            from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
         | 
| 33 | 
            -
            from sklearn.model_selection import BaseCrossValidator
         | 
| 34 | 
            +
            from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
         | 
| 34 35 |  | 
| 35 36 | 
             
            from upgini.errors import ValidationError
         | 
| 36 37 | 
             
            from upgini.metadata import ModelTaskType
         | 
| @@ -84,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = { | |
| 84 85 | 
             
                "auto_class_weights": "Balanced",
         | 
| 85 86 | 
             
            }
         | 
| 86 87 |  | 
| 87 | 
            -
            LIGHTGBM_PARAMS = {
         | 
| 88 | 
            -
                "random_state": DEFAULT_RANDOM_STATE,
         | 
| 89 | 
            -
                # "num_leaves": 16,
         | 
| 90 | 
            -
                # "n_estimators": 150,
         | 
| 91 | 
            -
                # "min_child_weight": 1,
         | 
| 92 | 
            -
                "max_depth": 4,
         | 
| 93 | 
            -
                "max_cat_threshold": 80,
         | 
| 94 | 
            -
                "min_data_per_group": 25,
         | 
| 95 | 
            -
                "num_boost_round": 150,
         | 
| 96 | 
            -
                "cat_l2": 10,
         | 
| 97 | 
            -
                "cat_smooth": 12,
         | 
| 98 | 
            -
                "learning_rate": 0.05,
         | 
| 99 | 
            -
                "feature_fraction": 1.0,
         | 
| 100 | 
            -
                "min_sum_hessian_in_leaf": 0.01,
         | 
| 101 | 
            -
            }
         | 
| 102 | 
            -
             | 
| 103 88 | 
             
            LIGHTGBM_REGRESSION_PARAMS = {
         | 
| 104 89 | 
             
                "random_state": DEFAULT_RANDOM_STATE,
         | 
| 105 90 | 
             
                "deterministic": True,
         | 
| @@ -128,7 +113,7 @@ LIGHTGBM_MULTICLASS_PARAMS = { | |
| 128 113 | 
             
                "cat_smooth": 18,
         | 
| 129 114 | 
             
                "cat_l2": 8,
         | 
| 130 115 | 
             
                "objective": "multiclass",
         | 
| 131 | 
            -
                "class_weight": "balanced",
         | 
| 116 | 
            +
                # "class_weight": "balanced",
         | 
| 132 117 | 
             
                "use_quantized_grad": "true",
         | 
| 133 118 | 
             
                "num_grad_quant_bins": "8",
         | 
| 134 119 | 
             
                "stochastic_rounding": "true",
         | 
| @@ -142,7 +127,7 @@ LIGHTGBM_BINARY_PARAMS = { | |
| 142 127 | 
             
                "max_depth": 5,
         | 
| 143 128 | 
             
                "learning_rate": 0.05,
         | 
| 144 129 | 
             
                "objective": "binary",
         | 
| 145 | 
            -
                "class_weight": "balanced",
         | 
| 130 | 
            +
                # "class_weight": "balanced",
         | 
| 146 131 | 
             
                "deterministic": True,
         | 
| 147 132 | 
             
                "max_cat_threshold": 80,
         | 
| 148 133 | 
             
                "min_data_per_group": 20,
         | 
| @@ -506,7 +491,8 @@ class EstimatorWrapper: | |
| 506 491 | 
             
                            params = _get_add_params(params, add_params)
         | 
| 507 492 | 
             
                            estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
         | 
| 508 493 | 
             
                        elif target_type == ModelTaskType.REGRESSION:
         | 
| 509 | 
            -
                             | 
| 494 | 
            +
                            if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
         | 
| 495 | 
            +
                                params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
         | 
| 510 496 | 
             
                            params = _get_add_params(params, add_params)
         | 
| 511 497 | 
             
                            estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
         | 
| 512 498 | 
             
                        else:
         | 
    
        upgini/utils/target_utils.py
    CHANGED
    
    | @@ -297,9 +297,9 @@ def balance_undersample_time_series_trunc( | |
| 297 297 | 
             
                time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
         | 
| 298 298 | 
             
                **kwargs,
         | 
| 299 299 | 
             
            ):
         | 
| 300 | 
            -
                # Convert date column to datetime
         | 
| 301 300 | 
             
                if id_columns is None:
         | 
| 302 | 
            -
                    id_columns = [ | 
| 301 | 
            +
                    id_columns = []
         | 
| 302 | 
            +
                # Convert date column to datetime
         | 
| 303 303 | 
             
                dates_df = df[id_columns + [date_column]].copy()
         | 
| 304 304 | 
             
                dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
         | 
| 305 305 |  | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.3
         | 
| 2 2 | 
             
            Name: upgini
         | 
| 3 | 
            -
            Version: 1.2. | 
| 3 | 
            +
            Version: 1.2.73a3659.dev2
         | 
| 4 4 | 
             
            Summary: Intelligent data search & enrichment for Machine Learning
         | 
| 5 5 | 
             
            Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
         | 
| 6 6 | 
             
            Project-URL: Homepage, https://upgini.com/
         | 
| @@ -1,4 +1,4 @@ | |
| 1 | 
            -
            upgini/__about__.py,sha256= | 
| 1 | 
            +
            upgini/__about__.py,sha256=Vn3aojC64D6rn5ZFKIFRFVE3tY8D8CLC3Y0V5pbn2Jo,33
         | 
| 2 2 | 
             
            upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
         | 
| 3 3 | 
             
            upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
         | 
| 4 4 | 
             
            upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
         | 
| @@ -6,7 +6,7 @@ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950 | |
| 6 6 | 
             
            upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
         | 
| 7 7 | 
             
            upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
         | 
| 8 8 | 
             
            upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
         | 
| 9 | 
            -
            upgini/metrics.py,sha256= | 
| 9 | 
            +
            upgini/metrics.py,sha256=a0bY4oTMb-MgB1yC1IuTcEtotKZxAxjgV_QV2Z4V8u4,38988
         | 
| 10 10 | 
             
            upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
         | 
| 11 11 | 
             
            upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
         | 
| 12 12 | 
             
            upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
         | 
| @@ -21,7 +21,7 @@ upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603 | |
| 21 21 | 
             
            upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
         | 
| 22 22 | 
             
            upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
         | 
| 23 23 | 
             
            upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
         | 
| 24 | 
            -
            upgini/autofe/vector.py,sha256 | 
| 24 | 
            +
            upgini/autofe/vector.py,sha256=w7ipoFRvR0BcTYcvJR9EbKc_ycIn9cJ94RLgrgIi4Uc,1212
         | 
| 25 25 | 
             
            upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
         | 
| 26 26 | 
             
            upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
         | 
| 27 27 | 
             
            upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
         | 
| @@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml | |
| 66 66 | 
             
            upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
         | 
| 67 67 | 
             
            upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
         | 
| 68 68 | 
             
            upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
         | 
| 69 | 
            -
            upgini/utils/target_utils.py,sha256= | 
| 69 | 
            +
            upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
         | 
| 70 70 | 
             
            upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
         | 
| 71 71 | 
             
            upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
         | 
| 72 72 | 
             
            upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
         | 
| 73 | 
            -
            upgini-1.2. | 
| 74 | 
            -
            upgini-1.2. | 
| 75 | 
            -
            upgini-1.2. | 
| 76 | 
            -
            upgini-1.2. | 
| 73 | 
            +
            upgini-1.2.73a3659.dev2.dist-info/METADATA,sha256=WImhNzA5wn2I_HyEYKvKAcUfpIWbQ0spUAI7tgu-fiQ,49101
         | 
| 74 | 
            +
            upgini-1.2.73a3659.dev2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
         | 
| 75 | 
            +
            upgini-1.2.73a3659.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
         | 
| 76 | 
            +
            upgini-1.2.73a3659.dev2.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |