PyPI - upgini - Versions diffs - 1.2.25a1__tar.gz → 1.2.27__tar.gz - Mend

upgini 1.2.25a1tar.gz → 1.2.27tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show

{upgini-1.2.25a1 → upgini-1.2.27}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.25a1
+Version: 1.2.27
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.25a1 → upgini-1.2.27}/pyproject.toml RENAMED Viewed

@@ -52,12 +52,6 @@ dependencies = [
     "levenshtein>=0.25.1",
 ]
-[tool.setuptools]
-include-package-data = true
-[tool.setuptools.package-data]
-"upgini" = ["utils/Roboto-Regular.ttf"]
 [project.urls]
 "Bug Reports" = "https://github.com/upgini/upgini/issues"
 Homepage = "https://upgini.com/"

upgini-1.2.27/src/upgini/__about__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.2.27"

upgini-1.2.27/src/upgini/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from upgini.features_enricher import FeaturesEnricher  # noqa: F401
+from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType  # noqa: F401
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="_distutils_hack")

{upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/features_enricher.py RENAMED Viewed

@@ -2546,9 +2546,11 @@ class FeaturesEnricher(TransformerMixin):
             self.fit_generated_features.extend(generator.generated_features)
         # Checks that need validated date
-        if not is_dates_distribution_valid(df, self.fit_search_keys):
-            self.__log_warning(bundle.get("x_unstable_by_date"))
+        try:
+            if not is_dates_distribution_valid(df, self.fit_search_keys):
+                self.__log_warning(bundle.get("x_unstable_by_date"))
+        except Exception:
+            self.logger.exception("Failed to check dates distribution validity")
         if (
             is_numeric_dtype(df[self.TARGET_NAME])
@@ -3194,9 +3196,8 @@ class FeaturesEnricher(TransformerMixin):
         return df
-    @staticmethod
     def _add_current_date_as_key(
-        df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
+        self, df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
     ) -> pd.DataFrame:
         if (
             set(search_keys.values()) == {SearchKey.PHONE}
@@ -3204,9 +3205,7 @@ class FeaturesEnricher(TransformerMixin):
             or set(search_keys.values()) == {SearchKey.HEM}
             or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
         ):
-            msg = bundle.get("current_date_added")
-            print(msg)
-            logger.warning(msg)
+            self.__log_warning(bundle.get("current_date_added"))
             df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
             search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
             converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE)
@@ -4041,15 +4040,19 @@ class FeaturesEnricher(TransformerMixin):
         half_train = round(len(train) / 2)
         part1 = train[:half_train]
         part2 = train[half_train:]
-        train_psi = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
-        if train_psi > 0.2:
-            self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi))
+        train_psi_result = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
+        if isinstance(train_psi_result, Exception):
+            self.logger.exception("Failed to calculate train PSI", train_psi_result)
+        elif train_psi_result > 0.2:
+            self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi_result))
         # 2. Check train-test PSI
         if eval1 is not None:
-            train_test_psi = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
-            if train_test_psi > 0.2:
-                self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi))
+            train_test_psi_result = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
+            if isinstance(train_test_psi_result, Exception):
+                self.logger.exception("Failed to calculate test PSI", train_test_psi_result)
+            elif train_test_psi_result > 0.2:
+                self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi_result))
     def _dump_python_libs(self):
         try:

{upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/resource_bundle/strings.properties RENAMED Viewed

@@ -9,7 +9,7 @@ search_stopped=Search request stopped
 polling_search_task=\nRunning search request, search_id={}
 polling_unregister_information=We'll send email notification once it's completed, just use your personal api_key from profile.upgini.com
 ads_upload_finish=Thank you for your submission!\nWe'll check your data sharing proposal and get back to you
-demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.
+demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.\n
 transform_usage_info=You use Trial access to Upgini data enrichment. Limit for Trial: {} rows. You have already enriched: {} rows.
 transform_usage_warning=You are trying to launch enrichment for {} rows, which will exceed the rest limit {}.

{upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/display_utils.py RENAMED Viewed

@@ -169,7 +169,6 @@ def make_html_report(
         from pkg_resources import resource_filename
         font_path = resource_filename('upgini.utils', 'Roboto-Regular.ttf')
-    print(font_path)
     return f"""<html>
         <head>
             <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />

{upgini-1.2.25a1 → upgini-1.2.27}/src/upgini/utils/target_utils.py RENAMED Viewed

@@ -3,7 +3,7 @@ from typing import Optional, Union
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_numeric_dtype
+from pandas.api.types import is_numeric_dtype, is_bool_dtype
 from upgini.errors import ValidationError
 from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
@@ -229,22 +229,25 @@ def balance_undersample(
     return resampled_data
-def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
-    df = pd.concat([expected, actual])
+def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Exception]:
+    try:
+        df = pd.concat([expected, actual])
-    # Define the bins for the target variable
-    df_min = df.min()
-    df_max = df.max()
-    bins = [df_min, (df_min + df_max) / 2, df_max]
+        if is_bool_dtype(df):
+            df = np.where(df, 1, 0)
-    # Calculate the base distribution
-    train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
+        # Define the bins for the target variable
+        df_min = df.min()
+        df_max = df.max()
+        bins = [df_min, (df_min + df_max) / 2, df_max]
-    # Calculate the target distribution
-    test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
+        # Calculate the base distribution
+        train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
-    # Calculate the PSI
-    try:
+        # Calculate the target distribution
+        test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
+        # Calculate the PSI
         return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
-    except Exception:
-        return np.nan
+    except Exception as e:
+        return e

upgini-1.2.25a1/src/upgini/__about__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = "1.2.25a1"

upgini-1.2.25a1/src/upgini/__init__.py DELETED Viewed

@@ -1,13 +0,0 @@
-import os
-from upgini.features_enricher import FeaturesEnricher  # noqa: F401
-from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType  # noqa: F401
-# from .lazy_import import LazyImport
-os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
-# FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
-# SearchKey = LazyImport("upgini.metadata", "SearchKey")
-# RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
-# CVType = LazyImport("upgini.metadata", "CVType")
-# ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")