PyPI - upgini - Versions diffs - 1.2.79__py3-none-any.whl → 1.2.80__py3-none-any.whl - Mend

upgini 1.2.79py3-none-any.whl → 1.2.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

upgini/__about__.py +1 -1
upgini/features_enricher.py +6 -5
upgini/metrics.py +42 -42
{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/METADATA +1 -1
{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/RECORD +7 -7
{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/WHEEL +0 -0
{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/licenses/LICENSE +0 -0

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.79"
1	+ __version__ = "1.2.80"

upgini/features_enricher.py CHANGED Viewed

@@ -1119,7 +1119,7 @@ class FeaturesEnricher(TransformerMixin):
                         self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
                     }
                     if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
-                        validated_y
+                        y_sorted
                     ):
                         train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
                             # np.mean(validated_y), 4
@@ -1197,7 +1197,7 @@ class FeaturesEnricher(TransformerMixin):
                                 # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
                             }
                             if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
-                                validated_eval_set[idx][1]
+                                eval_y_sorted
                             ):
                                 eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
                                     # np.mean(validated_eval_set[idx][1]), 4
@@ -3886,9 +3886,10 @@ if response.status_code == 200:
             if updated_shaps is not None:
                 updating_shap = updated_shaps.get(feature_meta.name)
                 if updating_shap is None:
-                    self.logger.warning(
-                        f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
-                    )
+                    if feature_meta.shap_value != 0.0:
+                        self.logger.warning(
+                            f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
+                        )
                     updating_shap = 0.0
                 feature_meta.shap_value = updating_shap

upgini/metrics.py CHANGED Viewed

@@ -99,7 +99,7 @@ LIGHTGBM_REGRESSION_PARAMS = {
     "min_sum_hessian_in_leaf": 0.01,
     "objective": "huber",
     "deterministic": "true",
-    "force_col_wise": "true",
+    # "force_col_wise": "true",
     "verbosity": -1,
 }
@@ -119,7 +119,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
     "num_grad_quant_bins": "8",
     "stochastic_rounding": "true",
     "deterministic": "true",
-    "force_col_wise": "true",
+    # "force_col_wise": "true",
     "verbosity": -1,
 }
@@ -136,7 +136,7 @@ LIGHTGBM_BINARY_PARAMS = {
     "cat_smooth": 18,
     "cat_l2": 8,
     "deterministic": "true",
-    "force_col_wise": "true",
+    # "force_col_wise": "true",
     "verbosity": -1,
 }
@@ -145,33 +145,33 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
 N_FOLDS = 5
 BLOCKED_TS_TEST_SIZE = 0.2
-NA_VALUES = [
-    "",
-    " ",
-    "   ",
-    "#n/a",
-    "#n/a n/a",
-    "#na",
-    "-1.#ind",
-    "-1.#qnan",
-    "-nan",
-    "1.#ind",
-    "1.#qnan",
-    "n/a",
-    "na",
-    "null",
-    "nan",
-    "n/a",
-    "nan",
-    "none",
-    "-",
-    "undefined",
-    "[[unknown]]",
-    "[not provided]",
-    "[unknown]",
-]
-NA_REPLACEMENT = "NA"
+# NA_VALUES = [
+#     "",
+#     " ",
+#     "   ",
+#     "#n/a",
+#     "#n/a n/a",
+#     "#na",
+#     "-1.#ind",
+#     "-1.#qnan",
+#     "-nan",
+#     "1.#ind",
+#     "1.#qnan",
+#     "n/a",
+#     "na",
+#     "null",
+#     "nan",
+#     "n/a",
+#     "nan",
+#     "none",
+#     "-",
+#     "undefined",
+#     "[[unknown]]",
+#     "[not provided]",
+#     "[unknown]",
+# ]
+# NA_REPLACEMENT = "NA"
 SUPPORTED_CATBOOST_METRICS = {
     s.upper(): s
@@ -758,8 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
             params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
         self.cat_features = _get_cat_features(x)
         if self.cat_features:
-            x = fill_na_cat_features(x, self.cat_features)
-            encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
+            # x = fill_na_cat_features(x, self.cat_features)
+            encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
             encoded = pd.DataFrame(
                 encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
             )
@@ -773,7 +773,7 @@ class LightGBMWrapper(EstimatorWrapper):
     def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
         x, y_numpy, params = super()._prepare_to_calculate(x, y)
         if self.cat_features is not None:
-            x = fill_na_cat_features(x, self.cat_features)
+            # x = fill_na_cat_features(x, self.cat_features)
             if self.cat_encoder is not None:
                 x[self.cat_features] = pd.DataFrame(
                     self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
@@ -857,7 +857,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
         self.cat_features = _get_cat_features(x)
         num_features = [col for col in x.columns if col not in self.cat_features]
         x[num_features] = x[num_features].fillna(-999)
-        x = fill_na_cat_features(x, self.cat_features)
+        # x = fill_na_cat_features(x, self.cat_features)
         # TODO use one-hot encoding if cardinality is less 50
         for feature in self.cat_features:
             x[feature] = x[feature].astype("category").cat.codes
@@ -870,7 +870,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
         if self.cat_features is not None:
             num_features = [col for col in x.columns if col not in self.cat_features]
             x[num_features] = x[num_features].fillna(-999)
-            x = fill_na_cat_features(x, self.cat_features)
+            # x = fill_na_cat_features(x, self.cat_features)
             # TODO use one-hot encoding if cardinality is less 50
             for feature in self.cat_features:
                 x[feature] = x[feature].astype("category").cat.codes
@@ -1056,10 +1056,10 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
     return mse if squared else np.sqrt(mse)
-def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
-    for c in cat_features:
-        if c in df.columns:
-            df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
-            na_filter = df[c].str.lower().isin(NA_VALUES)
-            df.loc[na_filter, c] = NA_REPLACEMENT
-    return df
+# def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
+#     for c in cat_features:
+#         if c in df.columns:
+#             df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
+#             na_filter = df[c].str.lower().isin(NA_VALUES)
+#             df.loc[na_filter, c] = NA_REPLACEMENT
+#     return df

{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.79
+Version: 1.2.80
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
-upgini/__about__.py,sha256=mupwAhPLfGDd9OAn7f6J2lwQapeaIysxn41khUOG57I,23
+upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=_UkJS35uGaYtI7dR6Xd9Q28nmiPzTjhK3y8v3IjJTfQ,208245
+upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
 upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
 upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
-upgini/metrics.py,sha256=_Ue1nymBMVmoCdPMGGXe0FFkvNxNg592FmX2WJWKDFE,39294
+upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
 upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
 upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
 upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.79.dist-info/METADATA,sha256=zxQyk76bWj5AGiXERdfEQVcqsZXD4RU5gaRQGk1LpeM,49091
-upgini-1.2.79.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.79.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.79.dist-info/RECORD,,
+upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
+upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.80.dist-info/RECORD,,

{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.79__py3-none-any.whl → 1.2.80__py3-none-any.whl

upgini 1.2.79py3-none-any.whl → 1.2.80py3-none-any.whl