PyPI - upgini - Versions diffs - 1.1.261a3250.post2__py3-none-any.whl → 1.1.262a3250.post4__py3-none-any.whl - Mend

upgini 1.1.261a3250.post2py3-none-any.whl → 1.1.262a3250.post4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

upgini/autofe/date.py +27 -18
upgini/autofe/feature.py +1 -1
upgini/autofe/operand.py +2 -0
upgini/autofe/unary.py +15 -8
upgini/autofe/vector.py +5 -3
upgini/data_source/data_source_publisher.py +10 -1
upgini/dataset.py +20 -57
upgini/features_enricher.py +1 -1
upgini/search_task.py +1 -1
upgini/utils/datetime_utils.py +1 -1
upgini/utils/target_utils.py +111 -2
{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/METADATA +1 -1
{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/RECORD +16 -16
{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/LICENSE +0 -0
{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/WHEEL +0 -0
{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/top_level.txt +0 -0

upgini/autofe/date.py CHANGED Viewed

@@ -1,42 +1,51 @@
+from typing import Optional, Union
 import numpy as np
 import pandas as pd
-from upgini.autofe.operand import PandasOperand, VectorizableMixin
+from upgini.autofe.operand import PandasOperand
-class DateDiff(PandasOperand, VectorizableMixin):
+class DateDiffMixin:
+    diff_unit: str = "D"
+    left_unit: Optional[str] = None
+    right_unit: Optional[str] = None
+    def _convert_to_date(
+        self, x: Union[pd.DataFrame, pd.Series], unit: Optional[str]
+    ) -> Union[pd.DataFrame, pd.Series]:
+        if isinstance(x, pd.DataFrame):
+            return x.apply(lambda y: self._convert_to_date(y, unit), axis=1)
+        return pd.to_datetime(x, unit=unit)
+class DateDiff(PandasOperand, DateDiffMixin):
     name = "date_diff"
     is_binary = True
     has_symmetry_importance = True
-    is_vectorizable = True
-    unit: str = "D"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
-        return self.__replace_negative((left - right) / np.timedelta64(1, self.unit))
-    def calculate_group(self, data: pd.DataFrame, **kwargs) -> pd.DataFrame:
-        group_column, value_columns = self.validate_calculation(data.columns, **kwargs)
-        d1 = data[value_columns]
-        d2 = data[group_column]
-        return self.__replace_negative(d1.sub(d2, axis=0) / np.timedelta64(1, self.unit))
+        left = self._convert_to_date(left, self.left_unit)
+        right = self._convert_to_date(right, self.right_unit)
+        return self.__replace_negative((left - right) / np.timedelta64(1, self.diff_unit))
-    def __replace_negative(self, df):
-        df[df < 0] = None
-        return df
+    def __replace_negative(self, x: Union[pd.DataFrame, pd.Series]):
+        x[x < 0] = None
+        return x
-class DateDiffFuture(PandasOperand):
+class DateDiffFuture(PandasOperand, DateDiffMixin):
     name = "date_diff_future"
     is_binary = True
     has_symmetry_importance = True
     is_vectorizable = False
-    unit: str = "D"
     def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
+        left = self._convert_to_date(left, self.left_unit)
+        right = self._convert_to_date(right, self.right_unit)
         future = pd.to_datetime(dict(day=right.dt.day, month=right.dt.month, year=left.dt.year))
         before = future[future < left]
         future[future < left] = pd.to_datetime(dict(day=before.dt.day, month=before.dt.month, year=before.dt.year + 1))
-        diff = (future - left) / np.timedelta64(1, self.unit)
+        diff = (future - left) / np.timedelta64(1, self.diff_unit)
         return diff

upgini/autofe/feature.py CHANGED Viewed

@@ -305,7 +305,7 @@ class FeatureGroup:
         grouped_features = []
         def groupby_func(f: Feature) -> Tuple[Operand, Union[Column, Feature]]:
-            return (f.op, f.children[0] if f.op.is_unary or f.op.is_vector else f.children[1])
+            return (f.op, f.children[0 if not f.op.is_vectorizable else f.op.group_index])
         for op_child, features in itertools.groupby(candidates, groupby_func):
             op, main_child = op_child

upgini/autofe/operand.py CHANGED Viewed

@@ -73,6 +73,8 @@ class PandasOperand(Operand, abc.ABC):
 class VectorizableMixin(Operand):
+    group_index: int = 1
     def validate_calculation(self, input_columns: List[str], **kwargs) -> Tuple[str, List[str]]:
         if not kwargs.get(MAIN_COLUMN):
             raise ValueError(f"Expected argument {MAIN_COLUMN} for grouping operator {self.name} not found")

upgini/autofe/unary.py CHANGED Viewed

@@ -1,12 +1,13 @@
-from upgini.autofe.operand import PandasOperand
+from upgini.autofe.operand import PandasOperand, VectorizableMixin
 import numpy as np
 import pandas as pd
-class Abs(PandasOperand):
+class Abs(PandasOperand, VectorizableMixin):
     name = "abs"
     is_unary = True
     is_vectorizable = True
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return data.abs()
@@ -15,11 +16,12 @@ class Abs(PandasOperand):
         return data.abs()
-class Log(PandasOperand):
+class Log(PandasOperand, VectorizableMixin):
     name = "log"
     is_unary = True
     is_vectorizable = True
     output_type = "float"
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(np.log(np.abs(data.replace(0, np.nan))), 10)
@@ -28,11 +30,12 @@ class Log(PandasOperand):
         return self._round_value(np.log(data.replace(0, np.nan).abs()), 10)
-class Sqrt(PandasOperand):
+class Sqrt(PandasOperand, VectorizableMixin):
     name = "sqrt"
     is_unary = True
     is_vectorizable = True
     output_type = "float"
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(np.sqrt(np.abs(data)))
@@ -41,10 +44,11 @@ class Sqrt(PandasOperand):
         return self._round_value(np.sqrt(data.abs()))
-class Square(PandasOperand):
+class Square(PandasOperand, VectorizableMixin):
     name = "square"
     is_unary = True
     is_vectorizable = True
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return np.square(data)
@@ -53,11 +57,12 @@ class Square(PandasOperand):
         return np.square(data)
-class Sigmoid(PandasOperand):
+class Sigmoid(PandasOperand, VectorizableMixin):
     name = "sigmoid"
     is_unary = True
     is_vectorizable = True
     output_type = "float"
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return self._round_value(1 / (1 + np.exp(-data)))
@@ -66,12 +71,13 @@ class Sigmoid(PandasOperand):
         return self._round_value(1 / (1 + np.exp(-data)))
-class Floor(PandasOperand):
+class Floor(PandasOperand, VectorizableMixin):
     name = "floor"
     is_unary = True
     is_vectorizable = True
     output_type = "int"
     input_type = "continuous"
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return np.floor(data)
@@ -80,11 +86,12 @@ class Floor(PandasOperand):
         return np.floor(data)
-class Residual(PandasOperand):
+class Residual(PandasOperand, VectorizableMixin):
     name = "residual"
     is_unary = True
     is_vectorizable = True
     input_type = "continuous"
+    group_index = 0
     def calculate_unary(self, data: pd.Series) -> pd.Series:
         return data - np.floor(data)

upgini/autofe/vector.py CHANGED Viewed

@@ -1,20 +1,22 @@
 from typing import List
 import pandas as pd
-from upgini.autofe.operand import PandasOperand
+from upgini.autofe.operand import PandasOperand, VectorizableMixin
-class Mean(PandasOperand):
+class Mean(PandasOperand, VectorizableMixin):
     name = "mean"
     output_type = "float"
     is_vector = True
+    group_index = 0
     def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
         return pd.DataFrame(data).T.fillna(0).mean(axis=1)
-class Sum(PandasOperand):
+class Sum(PandasOperand, VectorizableMixin):
     name = "sum"
     is_vector = True
+    group_index = 0
     def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
         return pd.DataFrame(data).T.fillna(0).sum(axis=1)

upgini/data_source/data_source_publisher.py CHANGED Viewed

@@ -40,7 +40,7 @@ class DataSourcePublisher:
         if logs_enabled:
             self.logger = LoggerFactory().get_logger(endpoint, api_key)
         else:
-            self.logger = logging.getLogger()
+            self.logger = logging.getLogger("muted_logger")
             self.logger.setLevel("FATAL")
     def place(
@@ -170,6 +170,7 @@ class DataSourcePublisher:
                     print(msg)
                     self.logger.info(msg)
                     self._rest_client.stop_ads_management_task(task_id, trace_id)
+                raise
             except Exception:
                 self.logger.exception("Failed to register data table")
                 raise
@@ -289,6 +290,7 @@ class DataSourcePublisher:
                 raise ValidationError("One of arguments: bq_table_id or search_keys should be presented")
             if bq_table_id is not None and search_keys is not None:
                 raise ValidationError("Only one argument could be presented: bq_table_id or search_keys")
+            task_id = None
             try:
                 search_keys = [k.value.value for k in search_keys] if search_keys else None
                 request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
@@ -303,6 +305,13 @@ class DataSourcePublisher:
                     raise Exception("Failed to register ADS: " + status_response["errorMessage"])
                 print("Uploading successfully finished")
+            except KeyboardInterrupt:
+                if task_id is not None:
+                    msg = f"Stopping AdsManagementTask {task_id}"
+                    print(msg)
+                    self.logger.info(msg)
+                    self._rest_client.stop_ads_management_task(task_id, trace_id)
+                raise
             except Exception:
                 self.logger.exception(f"Failed to upload table {bq_table_id}")
                 raise

upgini/dataset.py CHANGED Viewed

@@ -39,10 +39,10 @@ from upgini.metadata import (
 )
 from upgini.normalizer.phone_normalizer import PhoneNormalizer
 from upgini.resource_bundle import ResourceBundle, get_custom_bundle
-from upgini.sampler.random_under_sampler import RandomUnderSampler
 from upgini.search_task import SearchTask
 from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
 from upgini.utils.email_utils import EmailSearchKeyConverter
+from upgini.utils.target_utils import balance_undersample
 try:
     from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
@@ -61,6 +61,8 @@ class Dataset:  # (pd.DataFrame):
     FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
     MIN_SAMPLE_THRESHOLD = 5_000
     IMBALANCE_THESHOLD = 0.4
+    BINARY_BOOTSTRAP_LOOPS = 5
+    MULTICLASS_BOOTSTRAP_LOOPS = 2
     MIN_TARGET_CLASS_ROWS = 100
     MAX_MULTICLASS_CLASS_COUNT = 100
     MIN_SUPPORTED_DATE_TS = 946684800000  # 2000-01-01
@@ -460,10 +462,8 @@ class Dataset:  # (pd.DataFrame):
             self.task_type == ModelTaskType.BINARY and len(train_segment) > self.MIN_SAMPLE_THRESHOLD
         ):
             count = len(train_segment)
-            min_class_count = count
-            min_class_value = None
-            target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, "")
-            target = train_segment[target_column].copy()
+            target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, TARGET)
+            target = train_segment[target_column]
             target_classes_count = target.nunique()
             if target_classes_count > self.MAX_MULTICLASS_CLASS_COUNT:
@@ -473,12 +473,9 @@ class Dataset:  # (pd.DataFrame):
                 self.logger.warning(msg)
                 raise ValidationError(msg)
-            unique_target = target.unique()
-            for v in list(unique_target):  # type: ignore
-                current_class_count = len(train_segment.loc[target == v])
-                if current_class_count < min_class_count:
-                    min_class_count = current_class_count
-                    min_class_value = v
+            vc = target.value_counts()
+            min_class_value = vc.index[len(vc) - 1]
+            min_class_count = vc[min_class_value]
             if min_class_count < self.MIN_TARGET_CLASS_ROWS:
                 msg = self.bundle.get("dataset_rarest_class_less_min").format(
@@ -491,53 +488,19 @@ class Dataset:  # (pd.DataFrame):
             min_class_threshold = min_class_percent * count
             if min_class_count < min_class_threshold:
-                msg = self.bundle.get("dataset_rarest_class_less_threshold").format(
-                    min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
-                )
-                self.logger.warning(msg)
-                print(msg)
-                self.warning_counter.increment()
-                train_segment = train_segment.copy().sort_values(by=SYSTEM_RECORD_ID)
-                if self.task_type == ModelTaskType.MULTICLASS:
-                    # Sort classes by rows count and find 25% quantile class
-                    classes = target.value_counts().index
-                    quantile25_idx = int(0.75 * len(classes))
-                    quantile25_class = classes[quantile25_idx]
-                    count_of_quantile25_class = len(target[target == quantile25_class])
-                    msg = self.bundle.get("imbalance_multiclass").format(quantile25_class, count_of_quantile25_class)
-                    self.logger.warning(msg)
-                    print(msg)
-                    # 25% and lower classes will stay as is. Higher classes will be downsampled
-                    parts = []
-                    for class_idx in range(quantile25_idx):
-                        sampled = train_segment[train_segment[target_column] == classes[class_idx]].sample(
-                            n=count_of_quantile25_class, random_state=self.random_state
-                        )
-                        parts.append(sampled)
-                    for class_idx in range(quantile25_idx, len(classes)):
-                        parts.append(train_segment[train_segment[target_column] == classes[class_idx]])
-                    resampled_data = pd.concat(parts)
-                elif self.task_type == ModelTaskType.BINARY and min_class_count < self.MIN_SAMPLE_THRESHOLD / 2:
-                    minority_class = train_segment[train_segment[target_column] == min_class_value]
-                    majority_class = train_segment[train_segment[target_column] != min_class_value]
-                    sampled_majority_class = majority_class.sample(
-                        n=self.MIN_SAMPLE_THRESHOLD - min_class_count, random_state=self.random_state
-                    )
-                    resampled_data = train_segment[
-                        (train_segment[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
-                        | (train_segment[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
-                    ]
-                else:
-                    sampler = RandomUnderSampler(random_state=self.random_state)
-                    X = train_segment[SYSTEM_RECORD_ID]
-                    X = X.to_frame(SYSTEM_RECORD_ID)
-                    new_x, _ = sampler.fit_resample(X, target)  # type: ignore
-                    resampled_data = train_segment[train_segment[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
-                self.data = resampled_data
-                self.logger.info(f"Shape after rebalance resampling: {self.data.shape}")
                 self.imbalanced = True
+                self.data = balance_undersample(
+                    df=train_segment,
+                    target_column=target_column,
+                    task_type=self.task_type,
+                    random_state=self.random_state,
+                    imbalance_threshold=self.IMBALANCE_THESHOLD,
+                    binary_bootstrap_loops=self.BINARY_BOOTSTRAP_LOOPS,
+                    multiclass_bootstrap_loops=self.MULTICLASS_BOOTSTRAP_LOOPS,
+                    logger=self.logger,
+                    bundle=self.bundle,
+                    warning_counter=self.warning_counter,
+                )
         # Resample over fit threshold
         if not self.imbalanced and EVAL_SET_INDEX in self.data.columns:

upgini/features_enricher.py CHANGED Viewed

@@ -220,7 +220,7 @@ class FeaturesEnricher(TransformerMixin):
         if logs_enabled:
             self.logger = LoggerFactory().get_logger(endpoint, self._api_key, client_ip, client_visitorid)
         else:
-            self.logger = logging.getLogger()
+            self.logger = logging.getLogger("muted_logger")
             self.logger.setLevel("FATAL")
         if len(kwargs) > 0:

upgini/search_task.py CHANGED Viewed

@@ -57,7 +57,7 @@ class SearchTask:
         if logger is not None:
             self.logger = logger
         else:
-            self.logger = logging.getLogger()
+            self.logger = logging.getLogger("muted_logger")
             self.logger.setLevel("FATAL")
         self.provider_metadata_v2: Optional[List[ProviderTaskMetadataV2]] = None
         self.unused_features_for_generation: Optional[List[str]] = None

upgini/utils/datetime_utils.py CHANGED Viewed

@@ -44,7 +44,7 @@ class DateTimeSearchKeyConverter:
         if logger is not None:
             self.logger = logger
         else:
-            self.logger = logging.getLogger()
+            self.logger = logging.getLogger("muted_logger")
             self.logger.setLevel("FATAL")
         self.generated_features: List[str] = []
         self.bundle = bundle or get_custom_bundle()

upgini/utils/target_utils.py CHANGED Viewed

@@ -6,8 +6,10 @@ import pandas as pd
 from pandas.api.types import is_numeric_dtype
 from upgini.errors import ValidationError
-from upgini.metadata import ModelTaskType
-from upgini.resource_bundle import bundle
+from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
+from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
+from upgini.sampler.random_under_sampler import RandomUnderSampler
+from upgini.utils.warning_counter import WarningCounter
 def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
@@ -72,3 +74,110 @@ def is_int_encoding(unique_values):
     return set(unique_values) == set(range(len(unique_values))) or set(unique_values) == set(
         range(1, len(unique_values) + 1)
     )
+def balance_undersample(
+    df: pd.DataFrame,
+    target_column: str,
+    task_type: ModelTaskType,
+    random_state: int,
+    imbalance_threshold: int = 0.2,
+    min_sample_threshold: int = 5000,
+    binary_bootstrap_loops: int = 5,
+    multiclass_bootstrap_loops: int = 2,
+    logger: Optional[logging.Logger] = None,
+    bundle: Optional[ResourceBundle] = None,
+    warning_counter: Optional[WarningCounter] = None,
+) -> pd.DataFrame:
+    if logger is None:
+        logger = logging.getLogger("muted_logger")
+        logger.setLevel("FATAL")
+    bundle = bundle or get_custom_bundle()
+    if SYSTEM_RECORD_ID not in df.columns:
+        raise Exception("System record id must be presented for undersampling")
+    count = len(df)
+    target = df[target_column].copy()
+    target_classes_count = target.nunique()
+    vc = target.value_counts()
+    max_class_value = vc.index[0]
+    min_class_value = vc.index[len(vc) - 1]
+    max_class_count = vc[max_class_value]
+    min_class_count = vc[min_class_value]
+    min_class_percent = imbalance_threshold / target_classes_count
+    min_class_threshold = min_class_percent * count
+    resampled_data = df
+    df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
+    if task_type == ModelTaskType.MULTICLASS:
+        # Sort classes by rows count and find 25% quantile class
+        classes = vc.index
+        quantile25_idx = int(0.75 * len(classes)) - 1
+        quantile25_class = classes[quantile25_idx]
+        quantile25_class_cnt = vc[quantile25_class]
+        if max_class_count > (quantile25_class_cnt * multiclass_bootstrap_loops):
+            msg = bundle.get("imbalance_multiclass").format(quantile25_class, quantile25_class_cnt)
+            logger.warning(msg)
+            print(msg)
+            if warning_counter:
+                warning_counter.increment()
+            # 25% and lower classes will stay as is. Higher classes will be downsampled
+            sample_strategy = dict()
+            for class_idx in range(quantile25_idx):
+                # compare class count with count_of_quantile25_class * 2
+                class_value = classes[class_idx]
+                class_count = vc[class_value]
+                sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
+            sampler = RandomUnderSampler(
+                sampling_strategy=sample_strategy, random_state=random_state
+            )
+            X = df[SYSTEM_RECORD_ID]
+            X = X.to_frame(SYSTEM_RECORD_ID)
+            new_x, _ = sampler.fit_resample(X, target)  # type: ignore
+            resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
+    elif len(df) > min_sample_threshold and min_class_count < min_sample_threshold / 2:
+        msg = bundle.get("dataset_rarest_class_less_threshold").format(
+            min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
+        )
+        logger.warning(msg)
+        print(msg)
+        if warning_counter:
+            warning_counter.increment()
+        # fill up to min_sample_threshold by majority class
+        minority_class = df[df[target_column] == min_class_value]
+        majority_class = df[df[target_column] != min_class_value]
+        sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
+        sampled_majority_class = majority_class.sample(
+            n=sample_size, random_state=random_state
+        )
+        resampled_data = df[
+            (df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
+            | (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
+        ]
+    elif max_class_count > min_class_count * binary_bootstrap_loops:
+        msg = bundle.get("dataset_rarest_class_less_threshold").format(
+            min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
+        )
+        logger.warning(msg)
+        print(msg)
+        if warning_counter:
+            warning_counter.increment()
+        sampler = RandomUnderSampler(
+            sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
+        )
+        X = df[SYSTEM_RECORD_ID]
+        X = X.to_frame(SYSTEM_RECORD_ID)
+        new_x, _ = sampler.fit_resample(X, target)  # type: ignore
+        resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
+    logger.info(f"Shape after rebalance resampling: {resampled_data}")
+    return resampled_data

{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.261a3250.post2
+Version: 1.1.262a3250.post4
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers

{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/RECORD RENAMED Viewed

@@ -1,12 +1,12 @@
 upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
 upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
-upgini/dataset.py,sha256=ywBwf93d0IH39ZGfmNDlAwe1ILQtt1WzJ87WfIOMI2g,48149
+upgini/dataset.py,sha256=4LfrUwxhd__ZVqZkjPVxbC4SW3YLsk1sMMqnYPUaVpw,45529
 upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
-upgini/features_enricher.py,sha256=fFSLW6aAzVq5YYaVcl-xbjSd3qYt8dW9hYAIestylSk,172118
+upgini/features_enricher.py,sha256=WbwnLvPVqn4m995b6jSamWkXyRVy18fnG7faBeuJbWI,172132
 upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
 upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
 upgini/metrics.py,sha256=3VvSZW1cCOIPHImXuqcnWzD3fWcpPzVa9k8eulLbUmY,27426
-upgini/search_task.py,sha256=5n4qGJmtu48s0-FHAtF3L5qVLMd1JVW3FJlM8dFbh-s,17063
+upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
 upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
 upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
 upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
@@ -14,14 +14,14 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
 upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/autofe/all_operands.py,sha256=KWAdcYv6cToc6NZPcCmz6P3N8Nwjp8UqojKuz-f2BZY,1589
 upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
-upgini/autofe/date.py,sha256=tZFwxkRlxlleBso3NwfhGrI0YjABlfL7LP5w_Vlv_jU,1450
-upgini/autofe/feature.py,sha256=xeqTq35-BX4KCt0xAkk3UZAGzV5VyjorV5AdNdA5yLs,11851
+upgini/autofe/date.py,sha256=AC7Gabc7x2n4-_EmO1Q-7ncfCI_5-kPMQ3r3vFgQ1g4,1788
+upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
 upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
-upgini/autofe/operand.py,sha256=Rhy7Ky3we-I1Su1--dS4xdsO3K8neV4rqM_Q4xYE4ug,2779
-upgini/autofe/unary.py,sha256=gyMkrx9bfa3o19zS-4JaRlScHrfeZGBsYe7d_6ePT-0,2853
-upgini/autofe/vector.py,sha256=Qk7VmdwURNwVw7fIMEspWEo7HTiyUWCYIqu3hcWQQio,507
+upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
+upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
+upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
 upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-upgini/data_source/data_source_publisher.py,sha256=yCMyYwFTfv0e7h-kAdtiQCF42J1DbqmJ1Wi0xt_ZzeM,15578
+upgini/data_source/data_source_publisher.py,sha256=QASEDhJ9SxJKcWxoN2vUPxrM_HTlwKQOPa92L7EQneA,15962
 upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
 upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
 upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -40,7 +40,7 @@ upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6P
 upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
 upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
 upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
-upgini/utils/datetime_utils.py,sha256=5wvEz9DWL_RS4EST5FFIidfD36MSL-wij4P9AAJpMl0,8822
+upgini/utils/datetime_utils.py,sha256=ol5Bgh98wU6KBY9z4QskNO0ja-L7HJL70HmTAjl7iRU,8836
 upgini/utils/deduplicate_utils.py,sha256=ckJrpU8Ruc_vcwIPTopbUjyJuNiseLHNAbQlLfhUCxo,5888
 upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
 upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
@@ -52,11 +52,11 @@ upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,4
 upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
 upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
 upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
-upgini/utils/target_utils.py,sha256=DH812qcZ7Pvf9WVVb33fbwQjb1W9h1hXRNCCiG7Y6tI,2563
+upgini/utils/target_utils.py,sha256=WVhhxpQVvnhsDV7ctlds51VFg7hz59S_MFUSoRZFszw,7204
 upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
 upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
-upgini-1.1.261a3250.post2.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.1.261a3250.post2.dist-info/METADATA,sha256=51NlABKzSIZ6kYyKaLkHvB9Sl-vzfVCgi_zCyzsCGQU,48167
-upgini-1.1.261a3250.post2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-upgini-1.1.261a3250.post2.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
-upgini-1.1.261a3250.post2.dist-info/RECORD,,
+upgini-1.1.262a3250.post4.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.1.262a3250.post4.dist-info/METADATA,sha256=XfUGTmbya5IYq0uJYXwhUGxBy9DAnrQyWvNsyiZl6gM,48167
+upgini-1.1.262a3250.post4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+upgini-1.1.262a3250.post4.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
+upgini-1.1.262a3250.post4.dist-info/RECORD,,

{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/LICENSE RENAMED Viewed

File without changes

{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.1.261a3250.post2.dist-info → upgini-1.1.262a3250.post4.dist-info}/top_level.txt RENAMED Viewed

File without changes

upgini 1.1.261a3250.post2__py3-none-any.whl → 1.1.262a3250.post4__py3-none-any.whl

upgini 1.1.261a3250.post2py3-none-any.whl → 1.1.262a3250.post4py3-none-any.whl