PyPI - upgini - Versions diffs - 1.1.280.dev0__py3-none-any.whl → 1.2.31__py3-none-any.whl - Mend

upgini 1.1.280.dev0py3-none-any.whl → 1.2.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (43) hide show

upgini/__about__.py +1 -1
upgini/__init__.py +4 -20
upgini/autofe/all_operands.py +39 -9
upgini/autofe/binary.py +148 -45
upgini/autofe/date.py +197 -26
upgini/autofe/feature.py +102 -19
upgini/autofe/groupby.py +22 -22
upgini/autofe/operand.py +9 -6
upgini/autofe/unary.py +83 -41
upgini/autofe/vector.py +8 -8
upgini/data_source/data_source_publisher.py +128 -5
upgini/dataset.py +50 -386
upgini/features_enricher.py +931 -542
upgini/http.py +27 -16
upgini/lazy_import.py +35 -0
upgini/metadata.py +84 -59
upgini/metrics.py +164 -34
upgini/normalizer/normalize_utils.py +197 -0
upgini/resource_bundle/strings.properties +66 -51
upgini/search_task.py +10 -4
upgini/utils/Roboto-Regular.ttf +0 -0
upgini/utils/base_search_key_detector.py +14 -12
upgini/utils/country_utils.py +16 -0
upgini/utils/custom_loss_utils.py +39 -36
upgini/utils/datetime_utils.py +98 -45
upgini/utils/deduplicate_utils.py +135 -112
upgini/utils/display_utils.py +46 -15
upgini/utils/email_utils.py +54 -16
upgini/utils/feature_info.py +172 -0
upgini/utils/features_validator.py +34 -20
upgini/utils/ip_utils.py +100 -1
upgini/utils/phone_utils.py +343 -0
upgini/utils/postal_code_utils.py +34 -0
upgini/utils/sklearn_ext.py +28 -19
upgini/utils/target_utils.py +113 -57
upgini/utils/warning_counter.py +1 -0
upgini/version_validator.py +8 -4
{upgini-1.1.280.dev0.dist-info → upgini-1.2.31.dist-info}/METADATA +31 -16
upgini-1.2.31.dist-info/RECORD +65 -0
upgini/normalizer/phone_normalizer.py +0 -340
upgini-1.1.280.dev0.dist-info/RECORD +0 -62
{upgini-1.1.280.dev0.dist-info → upgini-1.2.31.dist-info}/WHEEL +0 -0
{upgini-1.1.280.dev0.dist-info → upgini-1.2.31.dist-info}/licenses/LICENSE +0 -0

upgini/utils/target_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Optional, Union
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_numeric_dtype
+from pandas.api.types import is_numeric_dtype, is_bool_dtype
 from upgini.errors import ValidationError
 from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
@@ -24,49 +24,83 @@ def define_task(
 ) -> ModelTaskType:
     if logger is None:
         logger = logging.getLogger()
+    # Replace inf and -inf with NaN to handle extreme values correctly
+    y = y.replace([np.inf, -np.inf], np.nan, inplace=False)
+    # Drop NaN values from the target
     target = y.dropna()
+    # Check if target is numeric and finite
     if is_numeric_dtype(target):
         target = target.loc[np.isfinite(target)]
     else:
+        # If not numeric, drop empty strings as well
         target = target.loc[target != ""]
+    # Raise error if there are no valid values left in the target
     if len(target) == 0:
         raise ValidationError(bundle.get("empty_target"))
+    # Count unique values in the target
     target_items = target.nunique()
+    # Raise error if all target values are the same
     if target_items == 1:
         raise ValidationError(bundle.get("dataset_constant_target"))
+    reason = ""  # Will store the reason for selecting the task type
+    # Binary classification case: exactly two unique values
     if target_items == 2:
         task = ModelTaskType.BINARY
+        reason = bundle.get("binary_target_reason")
     else:
+        # Attempt to convert target to numeric
         try:
             target = pd.to_numeric(target)
             is_numeric = True
         except Exception:
             is_numeric = False
-        # If any value is non numeric - multiclass
+        # If target cannot be converted to numeric, assume multiclass classification
         if not is_numeric:
             task = ModelTaskType.MULTICLASS
+            reason = bundle.get("non_numeric_multiclass_reason")
         else:
+            # Multiclass classification: few unique values and integer encoding
             if target.nunique() <= 50 and is_int_encoding(target.unique()):
                 task = ModelTaskType.MULTICLASS
+                reason = bundle.get("few_unique_label_multiclass_reason")
+            # Regression case: if there is date, assume regression
             elif has_date:
                 task = ModelTaskType.REGRESSION
+                reason = bundle.get("date_search_key_regression_reason")
             else:
+                # Remove zero values and recalculate unique ratio
                 non_zero_target = target[target != 0]
                 target_items = non_zero_target.nunique()
                 target_ratio = target_items / len(non_zero_target)
+                # Use unique_ratio to determine whether to classify as regression or multiclass
                 if (
-                    (target.dtype.kind == "f" and np.any(target != target.astype(int)))  # any non integer
+                    (target.dtype.kind == "f" and np.any(target != target.astype(int)))  # Non-integer float values
                     or target_items > 50
-                    or target_ratio > 0.2
+                    or target_ratio > 0.2  # If non-zero values have high ratio of uniqueness
                 ):
                     task = ModelTaskType.REGRESSION
+                    reason = bundle.get("many_unique_label_regression_reason")
                 else:
                     task = ModelTaskType.MULTICLASS
+                    reason = bundle.get("limited_int_multiclass_reason")
-    logger.info(f"Detected task type: {task}")
+    # Log or print the reason for the selected task type
+    logger.info(f"Detected task type: {task} (Reason: {reason})")
+    # Print task type and reason if silent mode is off
     if not silent:
-        print(bundle.get("target_type_detected").format(task))
+        print(bundle.get("target_type_detected").format(task, reason))
     return task
@@ -81,8 +115,8 @@ def balance_undersample(
     target_column: str,
     task_type: ModelTaskType,
     random_state: int,
-    imbalance_threshold: int = 0.2,
-    min_sample_threshold: int = 5000,
+    binary_min_sample_threshold: int = 5000,
+    multiclass_min_sample_threshold: int = 25000,
     binary_bootstrap_loops: int = 5,
     multiclass_bootstrap_loops: int = 2,
     logger: Optional[logging.Logger] = None,
@@ -96,52 +130,60 @@ def balance_undersample(
     if SYSTEM_RECORD_ID not in df.columns:
         raise Exception("System record id must be presented for undersampling")
-    count = len(df)
+    # count = len(df)
     target = df[target_column].copy()
-    target_classes_count = target.nunique()
+    # target_classes_count = target.nunique()
     vc = target.value_counts()
     max_class_value = vc.index[0]
     min_class_value = vc.index[len(vc) - 1]
     max_class_count = vc[max_class_value]
     min_class_count = vc[min_class_value]
+    num_classes = len(vc)
-    min_class_percent = imbalance_threshold / target_classes_count
-    min_class_threshold = int(min_class_percent * count)
+    # min_class_percent = imbalance_threshold / target_classes_count
+    # min_class_threshold = int(min_class_percent * count)
     resampled_data = df
     df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
     if task_type == ModelTaskType.MULTICLASS:
-        # Sort classes by rows count and find 25% quantile class
-        classes = vc.index
-        quantile25_idx = int(0.75 * len(classes)) - 1
-        quantile25_class = classes[quantile25_idx]
-        quantile25_class_cnt = vc[quantile25_class]
-        if max_class_count > (quantile25_class_cnt * multiclass_bootstrap_loops):
-            msg = bundle.get("imbalance_multiclass").format(quantile25_class, quantile25_class_cnt)
+        if len(df) > multiclass_min_sample_threshold and max_class_count > (
+            min_class_count * multiclass_bootstrap_loops
+        ):
+            # msg = bundle.get("imbalance_multiclass").format(min_class_value, min_class_count)
+            msg = bundle.get("imbalanced_target").format(min_class_value, min_class_count)
             logger.warning(msg)
             print(msg)
             if warning_counter:
                 warning_counter.increment()
-            # 25% and lower classes will stay as is. Higher classes will be downsampled
             sample_strategy = dict()
-            for class_idx in range(quantile25_idx):
-                # compare class count with count_of_quantile25_class * 2
-                class_value = classes[class_idx]
+            for class_value in vc.index:
+                if class_value == min_class_value:
+                    continue
                 class_count = vc[class_value]
-                sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
+                sample_size = min(
+                    class_count,
+                    multiclass_bootstrap_loops
+                    * (
+                        min_class_count
+                        + max((multiclass_min_sample_threshold - num_classes * min_class_count) / (num_classes - 1), 0)
+                    ),
+                )
+                sample_strategy[class_value] = int(sample_size)
+            logger.info(f"Rebalance sample strategy: {sample_strategy}. Min class count: {min_class_count}")
             sampler = RandomUnderSampler(sampling_strategy=sample_strategy, random_state=random_state)
             X = df[SYSTEM_RECORD_ID]
             X = X.to_frame(SYSTEM_RECORD_ID)
             new_x, _ = sampler.fit_resample(X, target)  # type: ignore
             resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
-    elif len(df) > min_sample_threshold and min_class_count < min_sample_threshold / 2:
-        msg = bundle.get("dataset_rarest_class_less_threshold").format(
-            min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
-        )
+    elif len(df) > binary_min_sample_threshold:
+        # msg = bundle.get("dataset_rarest_class_less_threshold").format(
+        #     min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
+        # )
+        msg = bundle.get("imbalanced_target").format(min_class_value, min_class_count)
         logger.warning(msg)
         print(msg)
         if warning_counter:
@@ -150,48 +192,62 @@ def balance_undersample(
         # fill up to min_sample_threshold by majority class
         minority_class = df[df[target_column] == min_class_value]
         majority_class = df[df[target_column] != min_class_value]
-        sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
+        # sample_size = min(len(majority_class), min_sample_threshold - min_class_count)
+        sample_size = min(
+            max_class_count,
+            binary_bootstrap_loops * (min_class_count + max(binary_min_sample_threshold - 2 * min_class_count, 0)),
+        )
+        logger.info(
+            f"Min class count: {min_class_count}. Max class count: {max_class_count}."
+            f" Rebalance sample size: {sample_size}"
+        )
         sampled_majority_class = majority_class.sample(n=sample_size, random_state=random_state)
         resampled_data = df[
             (df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
             | (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
         ]
-    elif max_class_count > min_class_count * binary_bootstrap_loops:
-        msg = bundle.get("dataset_rarest_class_less_threshold").format(
-            min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
-        )
-        logger.warning(msg)
-        print(msg)
-        if warning_counter:
-            warning_counter.increment()
+    # elif max_class_count > min_class_count * binary_bootstrap_loops:
+    #     msg = bundle.get("dataset_rarest_class_less_threshold").format(
+    #         min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
+    #     )
+    #     logger.warning(msg)
+    #     print(msg)
+    #     if warning_counter:
+    #         warning_counter.increment()
-        sampler = RandomUnderSampler(
-            sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
-        )
-        X = df[SYSTEM_RECORD_ID]
-        X = X.to_frame(SYSTEM_RECORD_ID)
-        new_x, _ = sampler.fit_resample(X, target)  # type: ignore
+    #     sampler = RandomUnderSampler(
+    #         sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
+    #     )
+    #     X = df[SYSTEM_RECORD_ID]
+    #     X = X.to_frame(SYSTEM_RECORD_ID)
+    #     new_x, _ = sampler.fit_resample(X, target)  # type: ignore
-        resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
+    #     resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
     logger.info(f"Shape after rebalance resampling: {resampled_data}")
     return resampled_data
-def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
-    df = pd.concat([expected, actual])
+def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Exception]:
+    try:
+        df = pd.concat([expected, actual])
+        if is_bool_dtype(df):
+            df = np.where(df, 1, 0)
-    # Define the bins for the target variable
-    df_min = df.min()
-    df_max = df.max()
-    bins = [df_min, (df_min + df_max) / 2, df_max]
+        # Define the bins for the target variable
+        df_min = df.min()
+        df_max = df.max()
+        bins = [df_min, (df_min + df_max) / 2, df_max]
-    # Calculate the base distribution
-    train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
+        # Calculate the base distribution
+        train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
-    # Calculate the target distribution
-    test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
+        # Calculate the target distribution
+        test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
-    # Calculate the PSI
-    return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
+        # Calculate the PSI
+        return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
+    except Exception as e:
+        return e

upgini/utils/warning_counter.py CHANGED Viewed

@@ -4,6 +4,7 @@ class WarningCounter:
     def increment(self):
         self._count += 1
+        return self._count
     def reset(self):
         self._count = 0

upgini/version_validator.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import threading
+from typing import Callable, Optional
 import requests
@@ -30,15 +31,18 @@ def get_version(package, url_pattern=URL_PATTERN):
     return version
-def validate_version(logger: logging.Logger):
+def validate_version(logger: logging.Logger, warning_function: Optional[Callable[[str], None]] = None):
     def task():
         try:
             current_version = parse(__version__)
             latest_version = get_version("upgini")
-            if current_version < latest_version:  # type: ignore
+            if current_version < latest_version:
                 msg = bundle.get("version_warning").format(current_version, latest_version)
-                logger.warning(msg)
-                print(msg)
+                if warning_function:
+                    warning_function(msg)
+                else:
+                    logger.warning(msg)
+                    print(msg)
         except Exception:
             logger.warning("Failed to validate version", exc_info=True)

{upgini-1.1.280.dev0.dist-info → upgini-1.2.31.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.1.280.dev0
+Version: 1.2.31
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/
@@ -22,15 +22,17 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Scientific/Engineering :: Information Analysis
-Requires-Python: <3.11,>=3.8
+Requires-Python: <3.12,>=3.8
 Requires-Dist: catboost>=1.0.3
 Requires-Dist: fastparquet>=0.8.1
 Requires-Dist: ipywidgets>=8.1.0
-Requires-Dist: lightgbm>=3.3.2
-Requires-Dist: numpy>=1.19.0
+Requires-Dist: jarowinkler>=2.0.0
+Requires-Dist: levenshtein>=0.25.1
+Requires-Dist: numpy<=1.26.4,>=1.19.0
 Requires-Dist: pandas<3.0.0,>=1.1.0
-Requires-Dist: pydantic<2.0.0,>=1.8.2
+Requires-Dist: pydantic<3.0.0,>1.0.0
 Requires-Dist: pyjwt>=2.8.0
+Requires-Dist: python-bidi==0.4.2
 Requires-Dist: python-dateutil>=2.8.0
 Requires-Dist: python-json-logger>=2.0.2
 Requires-Dist: requests>=2.8.0
@@ -130,7 +132,7 @@ Description-Content-Type: text/markdown
 |Consumer Confidence index| 44 |22|-|Monthly|date, country|No
 |World economic indicators|191 |41|-|Monthly|date, country|No
 |Markets data|-|17|-|Monthly|date, datetime|No
-|World mobile & fixed broadband network coverage and perfomance |167|-|3|Monthly|country, postal/ZIP code|No
+|World mobile & fixed broadband network coverage and performance |167|-|3|Monthly|country, postal/ZIP code|No
 |World demographic data |90|-|2|Annual|country, postal/ZIP code|No
 |World house prices |44|-|3|Annual|country, postal/ZIP code|No
 |Public social media profile data |104|-|-|Monthly|date, email/HEM, phone |Yes
@@ -143,7 +145,7 @@ Description-Content-Type: text/markdown
 ## 💼 Tutorials
-###  [Search of relevant external features & Automated feature generation for Salary predicton task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
+###  [Search of relevant external features & Automated feature generation for Salary prediction task (use as a template)](https://github.com/upgini/upgini/blob/main/notebooks/Upgini_Features_search%26generation.ipynb)
 * The goal is to predict salary for data science job postning based on information about employer and job description.
 * Following this guide, you'll learn how to **search & auto generate new relevant features with Upgini library**
@@ -257,7 +259,9 @@ We do dataset verification and cleaning under the hood, but still there are some
 *Search keys* columns will be used to match records from all potential external data sources / features.
 Define one or multiple columns as a search keys with `FeaturesEnricher` class initialization.
 ```python
-from upgini import FeaturesEnricher, SearchKey
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey
 enricher = FeaturesEnricher(
 	search_keys={
 		"subscription_activation_date": SearchKey.DATE,
@@ -343,7 +347,9 @@ enricher = FeaturesEnricher(
 For the meaning types <tt>SearchKey.DATE</tt>/<tt>SearchKey.DATETIME</tt> with dtypes <tt>object</tt> or <tt>string</tt> you have to clarify date/datetime format by passing <tt>date_format</tt> parameter to `FeaturesEnricher`. For example:
 ```python
-from upgini import FeaturesEnricher, SearchKey
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey
 enricher = FeaturesEnricher(
 	search_keys={
 		"subscription_activation_date": SearchKey.DATE,
@@ -364,7 +370,9 @@ df["date"] = df.date.astype("datetime64").dt.tz_localize("Europe/Warsaw")
 Single country for the whole training dataset can be passed with `country_code` parameter:
 ```python
-from upgini import FeaturesEnricher, SearchKey
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey
 enricher = FeaturesEnricher(
 	search_keys={
 		"subscription_activation_date": SearchKey.DATE,
@@ -383,7 +391,8 @@ Create instance of the `FeaturesEnricher` class and call:
 Let's try it out!
 ```python
 import pandas as pd
-from upgini import FeaturesEnricher, SearchKey
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey
 # load labeled training dataset to initiate search
 train_df = pd.read_csv("customer_churn_prediction_train.csv")
@@ -474,7 +483,9 @@ We detect ML task under the hood based on label column values. Currently we supp
 But for certain search datasets you can pass parameter to `FeaturesEnricher` with correct ML taks type:
 ```python
-from upgini import ModelTaskType
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey, ModelTaskType
 enricher = FeaturesEnricher(
 	search_keys={"subscription_activation_date": SearchKey.DATE},
 	model_task_type=ModelTaskType.REGRESSION
@@ -487,7 +498,9 @@ enricher = FeaturesEnricher(
 To initiate feature search you can pass cross-validation type parameter to `FeaturesEnricher` with time series specific CV type:
 ```python
-from upgini.metadata import CVType
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey, CVType
 enricher = FeaturesEnricher(
 	search_keys={"sales_date": SearchKey.DATE},
 	cv=CVType.time_series
@@ -621,7 +634,9 @@ But you can easily define new split by passing child of BaseCrossValidator to pa
 Example with more tips-and-tricks:
 ```python
-from upgini import FeaturesEnricher, SearchKey
+from upgini.features_enricher import FeaturesEnricher
+from upgini.metadata import SearchKey
 enricher = FeaturesEnricher(search_keys={"registration_date": SearchKey.DATE})
 # Fit with default setup for metrics calculation
@@ -794,7 +809,7 @@ You may publish ANY data which you consider as royalty / license free ([Open Dat
 2. Copy *Upgini API key* from profile and upload your data from Upgini python library with this key:
 ```python
 import pandas as pd
-from upgini import SearchKey
+from upgini.metadata import SearchKey
 from upgini.ads import upload_user_ads
 import os
 os.environ["UPGINI_API_KEY"] = "your_long_string_api_key_goes_here"
@@ -839,4 +854,4 @@ Some convenient ways to start contributing are:
 - [More perks for registered users](https://profile.upgini.com)
 <sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
-Please report it here.</a></sup>
+Please report it here</a></sup>

upgini-1.2.31.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,65 @@
+upgini/__about__.py,sha256=ZMRxZM_8KClqm4X0jGVzsRbSK2eN35eEoOdQFqr5IU0,23
+upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
+upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
+upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
+upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
+upgini/features_enricher.py,sha256=lNfu5Z40NmkkGJScKAwe_0VBtL8liePifuAlKE_flfA,192053
+upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
+upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
+upgini/metadata.py,sha256=lUa2xYhBhnCeTqNt6lWc9iP_YuikYGIsDSn8Vwyjv1I,11235
+upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
+upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
+upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
+upgini/version_validator.py,sha256=h1GViOWzULy5vf6M4dpTJuIk-4V38UCrTY1sb9yLa5I,1594
+upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
+upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
+upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+upgini/autofe/all_operands.py,sha256=cCCB44qvkmuWyiRM5Xykx8tkHPIjQthrWyj67STWN80,2578
+upgini/autofe/binary.py,sha256=zMhtHVuGUAFLUqem-XiXqJj-GRXxS88tdz8tFuDfSNM,7659
+upgini/autofe/date.py,sha256=OpFc3Al0xO3qlESn2Uokfxw51ArVqmh3xngWwdrsaqE,9762
+upgini/autofe/feature.py,sha256=eL7wABUhDKZzv3E-RPJNcyGwSfB0UptcfU2RbvsOks4,15082
+upgini/autofe/groupby.py,sha256=r-xl_keZZgm_tpiEoDhjYSkT6NHv7a4cRQR4wJ4uCp8,3263
+upgini/autofe/operand.py,sha256=uk883RaNqgXqtkaRqA1re1d9OFnnpv0JVvelYx09Yw0,2943
+upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
+upgini/autofe/vector.py,sha256=ehcZUDqV71TfbU8EmKfdYp603gS2dJY_-fpr10ho5sI,663
+upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
+upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
+upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
+upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
+upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
+upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
+upgini/resource_bundle/strings.properties,sha256=bKw_rjZZTomLJhQBqiM7_P2EoRq45_Ng2gP4WE6MRBE,26921
+upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
+upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
+upgini/sampler/random_under_sampler.py,sha256=TIbm7ATo-bCMF-IiS5sZeDC1ad1SYg0eY_rRmg84yIQ,4024
+upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
+upgini/utils/Roboto-Regular.ttf,sha256=kqYnZjMRQMpbyLulIChCLSdgYa1XF8GsUIoRi2Gcauw,168260
+upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
+upgini/utils/base_search_key_detector.py,sha256=Inc6iGG-VXQdejWFfbekIkZk2ahC4k7CdGqzOkie6Bs,1021
+upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl1UOB4s,3382
+upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
+upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
+upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
+upgini/utils/datetime_utils.py,sha256=F61i2vZCB6eUy4WwodDyPi50XKPbhOHsxDrU6tGa6CM,13133
+upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
+upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
+upgini/utils/email_utils.py,sha256=GbnhHJn1nhUBytmK6PophYqaoq4t7Lp6i0-O0Gd3RV8,5265
+upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
+upgini/utils/feature_info.py,sha256=Tp_2g5-rCjY4NpzKhzxwNxuqH5FFL8vG94OU5kH6wzk,6702
+upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
+upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
+upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
+upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
+upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
+upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
+upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
+upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
+upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
+upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
+upgini-1.2.31.dist-info/METADATA,sha256=_OJUvR8p-0uuVdltUq34yo_W5OZZvKOlID5OHlYY9Do,48578
+upgini-1.2.31.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+upgini-1.2.31.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.31.dist-info/RECORD,,

upgini 1.1.280.dev0__py3-none-any.whl → 1.2.31__py3-none-any.whl

Potentially problematic release.

upgini 1.1.280.dev0py3-none-any.whl → 1.2.31py3-none-any.whl