PyPI - upgini - Versions diffs - 1.2.68a3832.dev4__py3-none-any.whl → 1.2.68a3832.dev6__py3-none-any.whl - Mend

upgini 1.2.68a3832.dev4py3-none-any.whl → 1.2.68a3832.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (7) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.68a3832.~~dev4~~"
1	+ __version__ = "1.2.68a3832.dev6"

upgini/dataset.py CHANGED Viewed

@@ -388,7 +388,7 @@ class Dataset:  # (pd.DataFrame):
         for col in columns_to_validate:
             self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
             if validate_target and target is not None and col == target:
-                self.data.loc[self.data[target] == np.Inf, f"{col}_is_valid"] = False
+                self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
             if col in mandatory_columns:
                 self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]

upgini/metrics.py CHANGED Viewed

@@ -101,6 +101,52 @@ LIGHTGBM_PARAMS = {
     "min_sum_hessian_in_leaf": 0.01,
 }
+LIGHTGBM_REGRESSION_PARAMS = {
+  "random_state": DEFAULT_RANDOM_STATE,
+  "n_estimators": 275,
+  "max_depth": 5,
+  "max_cat_threshold": 80,
+  "min_data_per_group": 25,
+  "cat_l2": 10,
+  "cat_smooth": 12,
+  "learning_rate": 0.05,
+  "feature_fraction": 1.0,
+  "min_sum_hessian_in_leaf": 0.01,
+  "objective": "huber",
+  "verbosity": 0,
+}
+LIGHTGBM_MULTICLASS_PARAMS = {
+    "random_state": DEFAULT_RANDOM_STATE,
+    "n_estimators": 275,
+    "max_depth": 3,
+    "max_cat_threshold": 80,
+    "min_data_per_group": 25,
+    "cat_l2": 10,
+    "cat_smooth": 12,
+    "learning_rate": 0.25,              # CatBoost 0.25
+    "min_sum_hessian_in_leaf": 0.01,
+    "objective": "multiclass",
+    "class_weight": "balanced",
+    "verbosity": 0,
+}
+LIGHTGBM_BINARY_PARAMS = {
+    "random_state": DEFAULT_RANDOM_STATE,
+    "n_estimators": 275,
+    "max_depth": 5,
+    "max_cat_threshold": 80,
+    "min_data_per_group": 25,
+    "cat_l2": 10,
+    "cat_smooth": 12,
+    "learning_rate": 0.05,
+    "feature_fraction": 1.0,
+    "min_sum_hessian_in_leaf": 0.01,
+    "objective": "binary",
+    "class_weight": "balanced",
+    "verbosity": 0,
+}
 N_FOLDS = 5
 BLOCKED_TS_TEST_SIZE = 0.2
@@ -441,28 +487,27 @@ class EstimatorWrapper:
         }
         if estimator is None:
             params = {}
-            params["has_time"] = has_date
             # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
             #     params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
             if target_type == ModelTaskType.MULTICLASS:
                 # params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
                 # params = _get_add_params(params, add_params)
                 # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
-                params = _get_add_params(params, LIGHTGBM_PARAMS)
+                params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
                 params = _get_add_params(params, add_params)
                 estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
             elif target_type == ModelTaskType.BINARY:
                 # params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
                 # params = _get_add_params(params, add_params)
                 # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
-                params = _get_add_params(params, LIGHTGBM_PARAMS)
+                params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
                 params = _get_add_params(params, add_params)
                 estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
             elif target_type == ModelTaskType.REGRESSION:
                 # params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
                 # params = _get_add_params(params, add_params)
                 # estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
-                params = _get_add_params(params, LIGHTGBM_PARAMS)
+                params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
                 params = _get_add_params(params, add_params)
                 estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
             else:
@@ -474,6 +519,7 @@ class EstimatorWrapper:
                 estimator_copy = deepcopy(estimator)
             kwargs["estimator"] = estimator_copy
             if is_catboost_estimator(estimator):
+                params["has_time"] = has_date
                 if cat_features is not None:
                     for cat_feature in cat_features:
                         if cat_feature not in x.columns:
@@ -712,6 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
     def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
         x, y, groups, params = super()._prepare_to_fit(x, y)
+        if self.target_type == ModelTaskType.MULTICLASS:
+            params["num_class"] = y.nunique()
         self.cat_features = _get_cat_features(x)
         x = fill_na_cat_features(x, self.cat_features)
         for feature in self.cat_features:

{upgini-1.2.68a3832.dev4.dist-info → upgini-1.2.68a3832.dev6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.68a3832.dev4
+Version: 1.2.68a3832.dev6
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.68a3832.dev4.dist-info → upgini-1.2.68a3832.dev6.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-upgini/__about__.py,sha256=tA-8e1UEPt8tajFxx1rKXEmQOOfuqGfXLGYhIFUctPM,33
+upgini/__about__.py,sha256=8CoP2d6NQy3RuFamWKCHcwiF2GYkyj5rtk6FIpBm0rI,33
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
-upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
+upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
 upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,205476
 upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
 upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
 upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
-upgini/metrics.py,sha256=0WIe1IQx9vzUK0pVGv3hODBrOL3zaLDybXbs5S_ntvQ,36991
+upgini/metrics.py,sha256=ZBAjInLCm15BBYWNi9kz6IJs8R0WrF2PkrLnLAodR1Y,38246
 upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.68a3832.dev4.dist-info/METADATA,sha256=N1U1IshnQeHVgYDyt_pa2G7SezelEjMUjcdNOQ1KxkQ,49149
-upgini-1.2.68a3832.dev4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.68a3832.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.68a3832.dev4.dist-info/RECORD,,
+upgini-1.2.68a3832.dev6.dist-info/METADATA,sha256=UWgAnrn5D5mQT6Js-sXVBCA1wPW7YZU-JEEcnRdUCHU,49149
+upgini-1.2.68a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.68a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.68a3832.dev6.dist-info/RECORD,,

{upgini-1.2.68a3832.dev4.dist-info → upgini-1.2.68a3832.dev6.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.68a3832.dev4.dist-info → upgini-1.2.68a3832.dev6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.68a3832.dev4__py3-none-any.whl → 1.2.68a3832.dev6__py3-none-any.whl

Potentially problematic release.

upgini 1.2.68a3832.dev4py3-none-any.whl → 1.2.68a3832.dev6py3-none-any.whl