PyPI - upgini - Versions diffs - 1.2.71a3832.dev13__py3-none-any.whl → 1.2.72__py3-none-any.whl - Mend

upgini 1.2.71a3832.dev13py3-none-any.whl → 1.2.72py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~71a3832.dev13~~"
1	+ __version__ = "1.2.72"

upgini/features_enricher.py CHANGED Viewed

@@ -3250,8 +3250,7 @@ if response.status_code == 200:
     def _validate_eval_set_pair(self, X: pd.DataFrame, eval_pair: Tuple) -> Tuple[pd.DataFrame, pd.Series]:
         if len(eval_pair) != 2:
             raise ValidationError(self.bundle.get("eval_set_invalid_tuple_size").format(len(eval_pair)))
-        eval_X = eval_pair[0]
-        eval_y = eval_pair[1]
+        eval_X, eval_y = eval_pair
         if _num_samples(eval_X) == 0:
             raise ValidationError(self.bundle.get("eval_x_is_empty"))

upgini/metrics.py CHANGED Viewed

@@ -8,18 +8,18 @@ from copy import deepcopy
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import lightgbm as lgb
 import numpy as np
 import pandas as pd
 from lightgbm import LGBMClassifier, LGBMRegressor
-import lightgbm as lgb
 from numpy import log1p
 from pandas.api.types import is_numeric_dtype
-# from sklearn.calibration import LabelEncoder
 from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
 from sklearn.preprocessing import OrdinalEncoder
 from upgini.utils.features_validator import FeaturesValidator
 from upgini.utils.sklearn_ext import cross_validate
+from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
 try:
     from sklearn.metrics import get_scorer_names
@@ -31,7 +31,7 @@ except ImportError:
     available_scorers = SCORERS
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
-from sklearn.model_selection import BaseCrossValidator
+from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
 from upgini.errors import ValidationError
 from upgini.metadata import ModelTaskType
@@ -85,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = {
     "auto_class_weights": "Balanced",
 }
-LIGHTGBM_PARAMS = {
-    "random_state": DEFAULT_RANDOM_STATE,
-    # "num_leaves": 16,
-    # "n_estimators": 150,
-    # "min_child_weight": 1,
-    "max_depth": 4,
-    "max_cat_threshold": 80,
-    "min_data_per_group": 25,
-    "num_boost_round": 150,
-    "cat_l2": 10,
-    "cat_smooth": 12,
-    "learning_rate": 0.05,
-    "feature_fraction": 1.0,
-    "min_sum_hessian_in_leaf": 0.01,
-}
 LIGHTGBM_REGRESSION_PARAMS = {
     "random_state": DEFAULT_RANDOM_STATE,
     "deterministic": True,
@@ -127,9 +111,9 @@ LIGHTGBM_MULTICLASS_PARAMS = {
     "max_cat_threshold": 80,
     "min_data_per_group": 20,
     "cat_smooth": 18,
-    "cat_l2" : 8,
+    "cat_l2": 8,
     "objective": "multiclass",
-    "class_weight": "balanced",
+    # "class_weight": "balanced",
     "use_quantized_grad": "true",
     "num_grad_quant_bins": "8",
     "stochastic_rounding": "true",
@@ -143,12 +127,12 @@ LIGHTGBM_BINARY_PARAMS = {
     "max_depth": 5,
     "learning_rate": 0.05,
     "objective": "binary",
-    "class_weight": "balanced",
+    # "class_weight": "balanced",
     "deterministic": True,
     "max_cat_threshold": 80,
     "min_data_per_group": 20,
     "cat_smooth": 18,
-    "cat_l2" : 8,
+    "cat_l2": 8,
     "verbosity": -1,
 }
@@ -507,7 +491,8 @@ class EstimatorWrapper:
                 params = _get_add_params(params, add_params)
                 estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
             elif target_type == ModelTaskType.REGRESSION:
-                params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
+                if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
+                    params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
                 params = _get_add_params(params, add_params)
                 estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
             else:
@@ -756,7 +741,6 @@ class LightGBMWrapper(EstimatorWrapper):
             logger=logger,
         )
         self.cat_features = None
-        # self.cat_features_encoders = dict()
         self.cat_encoder = None
         self.n_classes = None
@@ -768,23 +752,13 @@ class LightGBMWrapper(EstimatorWrapper):
             params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
         self.cat_features = _get_cat_features(x)
         if self.cat_features:
-            params["categorical_feature"] = self.cat_features
-            # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features] Works
-            # params["categorical_feature"] = "notauto"
-            # params["categorical_feature"] = "name:" + ",".join(self.cat_features)  # Doesn't work
-            # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features] Doesn't work
-            # params["categorical_feature"] = ",".join(cat_indices)
-            pass
             x = fill_na_cat_features(x, self.cat_features)
             encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
-            encoded = encoder.fit_transform(x[self.cat_features], y_numpy)
+            encoded = pd.DataFrame(
+                encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
+            )
             x[self.cat_features] = encoded
             self.cat_encoder = encoder
-        # for feature in self.cat_features:
-        #     encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
-        #     x[feature] = encoder.fit_transform(x[feature])
-        #     self.cat_features_encoders[feature] = encoder
-            # x[feature] = x[feature].astype("category").cat.codes
         if not is_numeric_dtype(y_numpy):
             y_numpy = correct_string_target(y_numpy)
@@ -793,19 +767,11 @@ class LightGBMWrapper(EstimatorWrapper):
     def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
         x, y_numpy, params = super()._prepare_to_calculate(x, y)
         if self.cat_features is not None:
-            params["categorical_feature"] = self.cat_features
-            # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features]
-            # params["categorical_feature"] = "notauto"
-            # params["categorical_feature"] = "name:" + ",".join(self.cat_features)  # Doesn't work
-            # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features]
-            # params["categorical_feature"] = ",".join(cat_indices)
             x = fill_na_cat_features(x, self.cat_features)
             if self.cat_encoder is not None:
-                x[self.cat_features] = self.cat_encoder.transform(x[self.cat_features])
-            # for feature in self.cat_features:
-            #     encoder = self.cat_features_encoders[feature]
-            #     x[feature] = encoder.transform(x[feature])
-                # x[feature] = x[feature].astype("category").cat.codes
+                x[self.cat_features] = pd.DataFrame(
+                    self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
+                )
         if not is_numeric_dtype(y):
             y_numpy = correct_string_target(y_numpy)
         return x, y_numpy, params

upgini/utils/target_utils.py CHANGED Viewed

@@ -297,9 +297,9 @@ def balance_undersample_time_series_trunc(
     time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
     **kwargs,
 ):
-    # Convert date column to datetime
     if id_columns is None:
-        id_columns = [date_column]
+        id_columns = []
+    # Convert date column to datetime
     dates_df = df[id_columns + [date_column]].copy()
     dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")

{upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.71a3832.dev13
+Version: 1.2.72
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=buorll9F2OX4EgV8VmlIrj09nqmsSmqAG8T8p6hRCls,34
+upgini/__about__.py,sha256=-EK4ypqJTIRrg6g1P6PtLXT9vC4Vq7zblqFi389VgwA,23
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=lk80Bx9U36lva6T4lPHBFk88ivrpZ-2uwwMwQg0LglE,207023
+upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
 upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
 upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
-upgini/metrics.py,sha256=ot6AhxfRRTzM-dNApWTvmteLBAmGjD9OyAuKmtUTprE,40630
+upgini/metrics.py,sha256=a0bY4oTMb-MgB1yC1IuTcEtotKZxAxjgV_QV2Z4V8u4,38988
 upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
 upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
 upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
 upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
-upgini/utils/target_utils.py,sha256=KNFzJta1SpGU4sp07dHKSeVJlDs_9qgD2wcw5YuJfOc,16661
+upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.71a3832.dev13.dist-info/METADATA,sha256=JdRugxJAMW4KLyRuz7yIX_PqSz_nObynmhkW5-g_lVs,49102
-upgini-1.2.71a3832.dev13.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.71a3832.dev13.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.71a3832.dev13.dist-info/RECORD,,
+upgini-1.2.72.dist-info/METADATA,sha256=OpaT2gblO8qGzEJBNf36-dPwbedHPP93bX0fPAOMl38,49091
+upgini-1.2.72.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.72.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.72.dist-info/RECORD,,

{upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.71a3832.dev13__py3-none-any.whl → 1.2.72__py3-none-any.whl

upgini 1.2.71a3832.dev13py3-none-any.whl → 1.2.72py3-none-any.whl