PyPI - upgini - Versions diffs - 1.1.275__py3-none-any.whl → 1.1.275a1__py3-none-any.whl - Mend

upgini 1.1.275py3-none-any.whl → 1.1.275a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (23) hide show

upgini/ads.py +2 -6
upgini/autofe/date.py +2 -9
upgini/data_source/data_source_publisher.py +1 -1
upgini/dataset.py +13 -6
upgini/features_enricher.py +220 -154
upgini/metadata.py +9 -1
upgini/metrics.py +0 -12
upgini/normalizer/phone_normalizer.py +2 -2
upgini/resource_bundle/strings.properties +2 -2
upgini/utils/__init__.py +2 -3
upgini/utils/base_search_key_detector.py +14 -12
upgini/utils/country_utils.py +2 -2
upgini/utils/datetime_utils.py +4 -7
upgini/utils/deduplicate_utils.py +11 -1
upgini/utils/email_utils.py +7 -2
upgini/utils/features_validator.py +1 -2
upgini/utils/target_utils.py +1 -1
upgini/utils/track_info.py +13 -25
{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/METADATA +2 -2
{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/RECORD +23 -23
{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/LICENSE +0 -0
{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/WHEEL +0 -0
{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/top_level.txt +0 -0

upgini/metadata.py CHANGED Viewed

@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Set
 from pydantic import BaseModel
 SYSTEM_RECORD_ID = "system_record_id"
+ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
+SEARCH_KEY_UNNEST = "search_key_unnest"
 SORT_ID = "sort_id"
 EVAL_SET_INDEX = "eval_set_index"
 TARGET = "target"
@@ -11,7 +13,7 @@ COUNTRY = "country_iso_code"
 RENAMED_INDEX = "index_col"
 DEFAULT_INDEX = "index"
 ORIGINAL_INDEX = "original_index"
-SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
+SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
 class FileColumnMeaningType(Enum):
@@ -37,6 +39,8 @@ class FileColumnMeaningType(Enum):
     POSTAL_CODE = "POSTAL_CODE"
     SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
     EVAL_SET_INDEX = "EVAL_SET_INDEX"
+    ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
+    UNNEST_KEY = "UNNEST_KEY"
 class SearchKey(Enum):
@@ -182,6 +186,10 @@ class FileColumnMetadata(BaseModel):
     meaningType: FileColumnMeaningType
     minMaxValues: Optional[NumericInterval] = None
     originalName: Optional[str]
+    # is this column contains keys from multiple key columns like msisdn1, msisdn2
+    isUnnest: bool = False,
+    # list of original etalon key column names like msisdn1, msisdn2
+    unnestKeyNames: Optional[list[str]]
 class FileMetadata(BaseModel):

upgini/metrics.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import inspect
 import logging
 import re
 from copy import deepcopy
@@ -382,11 +381,6 @@ class EstimatorWrapper:
             kwargs["estimator"] = estimator_copy
             if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
                 if cat_features is not None:
-                    for cat_feature in cat_features:
-                        if cat_feature not in X.columns:
-                            logger.error(
-                                f"Client cat_feature `{cat_feature}` not found in X columns: {X.columns.to_list()}"
-                            )
                     estimator_copy.set_params(
                         cat_features=[X.columns.get_loc(cat_feature) for cat_feature in cat_features]
                     )
@@ -653,12 +647,6 @@ class OtherEstimatorWrapper(EstimatorWrapper):
 def validate_scoring_argument(scoring: Union[Callable, str, None]):
     if isinstance(scoring, str) and scoring is not None:
         _get_scorer_by_name(scoring)
-    elif isinstance(scoring, Callable):
-        spec = inspect.getfullargspec(scoring)
-        if len(spec.args) < 3:
-            raise ValidationError(
-                f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, X, y"
-            )
 def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:

upgini/normalizer/phone_normalizer.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Optional
 import pandas as pd
-from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype, is_object_dtype
+from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype
 from upgini.errors import ValidationError
@@ -44,7 +44,7 @@ class PhoneNormalizer:
         Method will remove all non numeric chars from string and convert it to int.
         None will be set for phone numbers that couldn"t be converted to int
         """
-        if is_string_dtype(self.df[self.phone_column_name]) or is_object_dtype(self.df[self.phone_column_name]):
+        if is_string_dtype(self.df[self.phone_column_name]):
             convert_func = self.phone_str_to_int_safe
         elif is_float_dtype(self.df[self.phone_column_name]):
             convert_func = self.phone_float_to_int_safe

upgini/resource_bundle/strings.properties CHANGED Viewed

@@ -38,7 +38,6 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
 loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
 multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
 group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
-current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
 # Errors
 failed_search_by_task_id=Failed to retrieve the specified search results
@@ -88,6 +87,7 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
 search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
 empty_search_key=Search key {} is empty. Please fill values or remove this search key
 single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
+unsupported_multi_key=Search key {} cannot be used multiple times
 unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
 date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
 invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -159,7 +159,7 @@ dataset_invalid_multiclass_target=Unexpected dtype of target for multiclass task
 dataset_invalid_regression_target=Unexpected dtype of target for regression task type: {}. Expected float
 dataset_invalid_timeseries_target=Unexpected dtype of target for timeseries task type: {}. Expected float
 dataset_to_many_multiclass_targets=The number of target classes {} exceeds the allowed threshold: {}. Please, correct your data and try again
-dataset_rarest_class_less_min=Count of rows with the rarest class `{}` is {}, minimum count must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
+dataset_rarest_class_less_min=Frequency of the rarest class `{}` is {}, minimum frequency must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
 dataset_rarest_class_less_threshold=\nWARNING: Target is imbalanced and will be undersampled to the rarest class. Frequency of the rarest class `{}` is {}\nMinimum number of observations for each class to avoid undersampling {} ({}%)
 dataset_date_features=\nWARNING: Columns {} is a datetime or period type but not used as a search key, removed from X
 dataset_too_many_features=Too many features. Maximum number of features is {}

upgini/utils/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@ import itertools
 from typing import List, Tuple
 import pandas as pd
-from pandas.api.types import is_string_dtype, is_object_dtype
+from pandas.api.types import is_string_dtype
 def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
@@ -20,6 +20,5 @@ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
     return [
         col
         for col in tmp.columns
-        if (is_string_dtype(tmp[col]) or is_object_dtype(tmp[col]))
-        and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
+        if is_string_dtype(tmp[col]) and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
     ]

upgini/utils/base_search_key_detector.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import List
 import pandas as pd
@@ -10,16 +10,18 @@ class BaseSearchKeyDetector:
     def _is_search_key_by_values(self, column: pd.Series) -> bool:
         raise NotImplementedError()
-    def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
-        for column_name in column_names:
-            if self._is_search_key_by_name(column_name):
-                return column_name
+    def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
+        return [
+            column_name
+            for column_name in column_names
+            if self._is_search_key_by_name(column_name)
+        ]
-    def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
-        maybe_column = self._get_search_key_by_name(df.columns.to_list())
-        if maybe_column is not None:
-            return maybe_column
-        for column_name in df.columns:
+    def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
+        other_columns = [col for col in df.columns if col not in existing_search_keys]
+        columns_by_names = self._get_search_keys_by_name(other_columns)
+        columns_by_values = []
+        for column_name in other_columns:
             if self._is_search_key_by_values(df[column_name]):
-                return column_name
+                columns_by_values.append(column_name)
+        return list(set(columns_by_names + columns_by_values))

upgini/utils/country_utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-from pandas.api.types import is_string_dtype, is_object_dtype
+from pandas.api.types import is_string_dtype
 from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
@@ -9,7 +9,7 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
         return "country" in str(column_name).lower()
     def _is_search_key_by_values(self, column: pd.Series) -> bool:
-        if not is_string_dtype(column) and not is_object_dtype(column):
+        if not is_string_dtype(column):
             return False
         all_count = len(column)

upgini/utils/datetime_utils.py CHANGED Viewed

@@ -6,10 +6,7 @@ from typing import Dict, List, Optional
 import numpy as np
 import pandas as pd
 from dateutil.relativedelta import relativedelta
-from pandas.api.types import (
-    is_numeric_dtype,
-    is_period_dtype,
-)
+from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
 from upgini.errors import ValidationError
 from upgini.metadata import SearchKey
@@ -81,6 +78,9 @@ class DateTimeSearchKeyConverter:
             df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
         elif isinstance(df[self.date_column].values[0], datetime.date):
             df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
+        elif is_string_dtype(df[self.date_column]):
+            df[self.date_column] = df[self.date_column].apply(self.clean_date)
+            df[self.date_column] = self.parse_date(df)
         elif is_period_dtype(df[self.date_column]):
             df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
         elif is_numeric_dtype(df[self.date_column]):
@@ -100,9 +100,6 @@ class DateTimeSearchKeyConverter:
                 msg = self.bundle.get("unsupported_date_type").format(self.date_column)
                 self.logger.warning(msg)
                 raise ValidationError(msg)
-        else:
-            df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
-            df[self.date_column] = self.parse_date(df)
         # If column with date is datetime then extract seconds of the day and minute of the hour
         # as additional features

upgini/utils/deduplicate_utils.py CHANGED Viewed

@@ -3,7 +3,15 @@ from typing import Dict, List, Optional, Union
 import pandas as pd
-from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
+from upgini.metadata import (
+    ENTITY_SYSTEM_RECORD_ID,
+    EVAL_SET_INDEX,
+    SORT_ID,
+    SYSTEM_RECORD_ID,
+    TARGET,
+    ModelTaskType,
+    SearchKey,
+)
 from upgini.resource_bundle import ResourceBundle
 from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
 from upgini.utils.target_utils import define_task
@@ -143,6 +151,8 @@ def clean_full_duplicates(
     unique_columns = df.columns.tolist()
     if SYSTEM_RECORD_ID in unique_columns:
         unique_columns.remove(SYSTEM_RECORD_ID)
+    if ENTITY_SYSTEM_RECORD_ID in unique_columns:
+        unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
     if SORT_ID in unique_columns:
         unique_columns.remove(SORT_ID)
     if EVAL_SET_INDEX in unique_columns:

upgini/utils/email_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ from hashlib import sha256
 from typing import Dict, List, Optional
 import pandas as pd
-from pandas.api.types import is_string_dtype, is_object_dtype
+from pandas.api.types import is_string_dtype
 from upgini.resource_bundle import bundle
 from upgini.metadata import SearchKey
@@ -18,7 +18,7 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
         return str(column_name).lower() in ["email", "e_mail", "e-mail"]
     def _is_search_key_by_values(self, column: pd.Series) -> bool:
-        if not is_string_dtype(column) and not is_object_dtype:
+        if not is_string_dtype(column):
             return False
         if not column.astype("string").str.contains("@").any():
             return False
@@ -38,11 +38,13 @@ class EmailSearchKeyConverter:
         email_column: str,
         hem_column: Optional[str],
         search_keys: Dict[str, SearchKey],
+        unnest_search_keys: Optional[List[str]] = None,
         logger: Optional[logging.Logger] = None,
     ):
         self.email_column = email_column
         self.hem_column = hem_column
         self.search_keys = search_keys
+        self.unnest_search_keys = unnest_search_keys
         if logger is not None:
             self.logger = logger
         else:
@@ -80,9 +82,12 @@ class EmailSearchKeyConverter:
                 del self.search_keys[self.email_column]
                 return df
             self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
+            self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
             self.email_converted_to_hem = True
         del self.search_keys[self.email_column]
+        if self.email_column in self.unnest_search_keys:
+            self.unnest_search_keys.remove(self.email_column)
         df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)

upgini/utils/features_validator.py CHANGED Viewed

@@ -81,8 +81,7 @@ class FeaturesValidator:
         return [
             i
             for i in df
-            if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
-            and (df[i].nunique(dropna=False) / row_count >= 0.85)
+            if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique(dropna=False) / row_count >= 0.95)
         ]
     @staticmethod

upgini/utils/target_utils.py CHANGED Viewed

@@ -107,7 +107,7 @@ def balance_undersample(
     min_class_count = vc[min_class_value]
     min_class_percent = imbalance_threshold / target_classes_count
-    min_class_threshold = int(min_class_percent * count)
+    min_class_threshold = min_class_percent * count
     resampled_data = df
     df = df.copy().sort_values(by=SYSTEM_RECORD_ID)

upgini/utils/track_info.py CHANGED Viewed

@@ -55,7 +55,7 @@ def _get_execution_ide() -> str:
 def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
     # default values
     track = {"ide": _get_execution_ide()}
-    ident_res = "https://api64.ipify.org"
+    ident_res = "https://api.ipify.org"
     try:
         track["hostname"] = socket.gethostname()
@@ -74,20 +74,17 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
             display(
                 Javascript(
                     """
-                    async function getVisitorId() {
-                        return import('https://upgini.github.io/upgini/js/a.js')
+                        import('https://upgini.github.io/upgini/js/a.js')
                             .then(FingerprintJS => FingerprintJS.load())
                             .then(fp => fp.get())
-                            .then(result => result.visitorId);
-                    }
+                            .then(result => window.visitorId = result.visitorId);
                     """
                 )
             )
-            track["visitorId"] = output.eval_js("getVisitorId()", timeout_sec=30)
+            track["visitorId"] = output.eval_js("window.visitorId", timeout_sec=10)
         except Exception as e:
             track["err"] = str(e)
-            if "visitorId" not in track:
-                track["visitorId"] = "None"
+            track["visitorId"] = "None"
         if client_ip:
             track["ip"] = client_ip
         else:
@@ -98,19 +95,16 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
                 display(
                     Javascript(
                         f"""
-                        async function getIP() {{
-                            return fetch("{ident_res}")
+                            fetch("{ident_res}")
                                 .then(response => response.text())
-                                .then(data => data);
-                        }}
+                                .then(data => window.clientIP = data);
                         """
                     )
                 )
-                track["ip"] = output.eval_js("getIP()", timeout_sec=10)
+                track["ip"] = output.eval_js("window.clientIP", timeout_sec=10)
             except Exception as e:
                 track["err"] = str(e)
-                if "ip" not in track:
-                    track["ip"] = "0.0.0.0"
+                track["ip"] = "0.0.0.0"
     elif track["ide"] == "binder":
         try:
@@ -122,10 +116,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
                 track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
         except Exception as e:
             track["err"] = str(e)
-            if "ip" not in track:
-                track["ip"] = "0.0.0.0"
-            if "visitorId" not in track:
-                track["visitorId"] = "None"
+            track["ip"] = "0.0.0.0"
+            track["visitorId"] = "None"
     elif track["ide"] == "kaggle":
         try:
@@ -144,8 +136,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
                     raise Exception(err)
         except Exception as e:
             track["err"] = str(e)
-            if "visitorId" not in track:
-                track["visitorId"] = "None"
+            track["ip"] = "0.0.0.0"
+            track["visitorId"] = "None"
     else:
         try:
             if client_ip:
@@ -158,9 +150,5 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
                 track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
         except Exception as e:
             track["err"] = str(e)
-            if "visitorId" not in track:
-                track["visitorId"] = "None"
-            if "ip" not in track:
-                track["ip"] = "0.0.0.0"
     return track

{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.275
+Version: 1.1.275a1
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: python-dateutil >=2.8.0
 Requires-Dist: requests >=2.8.0
-Requires-Dist: pandas <3.0.0,>=1.1.0
+Requires-Dist: pandas <2.0.0,>=1.1.0
 Requires-Dist: numpy >=1.19.0
 Requires-Dist: scikit-learn >=1.3.0
 Requires-Dist: pydantic <2.0.0,>=1.8.2

{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
-upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
-upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
+upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
+upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
 upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
-upgini/features_enricher.py,sha256=XKN-SdzX5EHKJHiPWvmEGDiCy6iK2ZaNPw75DYfcev0,176176
+upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
 upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
 upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
-upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
-upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
+upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
+upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
 upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
 upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
 upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -15,49 +15,49 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
 upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
 upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
-upgini/autofe/date.py,sha256=408p8P2OTPM2D3LsEGGtaiCepKGgM1BbOCQNRzAmI6c,4223
+upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
 upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
 upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
 upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
 upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
 upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
 upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
+upgini/data_source/data_source_publisher.py,sha256=J2lrpPuysUHPeqTSfoybBtPRTBCFu7R5KzaakhjaRDc,16485
 upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
 upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
 upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
+upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
 upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
 upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
-upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
+upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
 upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
 upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
 upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
 upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
-upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
-upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
+upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
+upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
 upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
-upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
+upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
 upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
 upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
-upgini/utils/datetime_utils.py,sha256=_mfhWb5ogEThvanQ-py1Lb6VvUvF2vT20tQgNprNz6o,10321
-upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
+upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
+upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
 upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
-upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
+upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
 upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
-upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
+upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
 upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
 upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
 upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
 upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
 upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
 upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
-upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
-upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
+upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
+upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
 upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
-upgini-1.1.275.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.1.275.dist-info/METADATA,sha256=6RZCJLAqN3qIrXOvyAaQIr75-TZw4NcLkp5yXS637ls,48156
-upgini-1.1.275.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-upgini-1.1.275.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
-upgini-1.1.275.dist-info/RECORD,,
+upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
+upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
+upgini-1.1.275a1.dist-info/RECORD,,

{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/LICENSE RENAMED Viewed

File without changes

{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.1.275.dist-info → upgini-1.1.275a1.dist-info}/top_level.txt RENAMED Viewed

File without changes

upgini 1.1.275__py3-none-any.whl → 1.1.275a1__py3-none-any.whl

Potentially problematic release.

upgini 1.1.275py3-none-any.whl → 1.1.275a1py3-none-any.whl