PyPI - upgini - Versions diffs - 1.1.242a3__tar.gz → 1.1.244a1__tar.gz - Mend

upgini 1.1.242a3tar.gz → 1.1.244a1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of upgini might be problematic. Click here for more details.

Files changed (81) hide show

{upgini-1.1.242a3/src/upgini.egg-info → upgini-1.1.244a1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.242a3
+Version: 1.1.244a1
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers

{upgini-1.1.242a3 → upgini-1.1.244a1}/setup.py RENAMED Viewed

@@ -40,7 +40,7 @@ def send_log(msg: str):
 here = Path(__file__).parent.resolve()
-version = "1.1.242a3"
+version = "1.1.244a1"
 try:
     send_log(f"Start setup PyLib version {version}")
     setup(

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/ads_management/ads_manager.py RENAMED Viewed

@@ -7,7 +7,6 @@ import pandas as pd
 class AdsManager:
     FINAL_STATUSES = ["COMPLETED", "FAILED", "TIMED_OUT"]
     def __init__(self, api_key: Optional[str] = None, backend_url: Optional[str] = None):

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/feature.py RENAMED Viewed

@@ -53,9 +53,15 @@ class Column:
 class Feature:
-    def __init__(self, op: Operand, children: List[Union[Column, "Feature"]], data: Optional[pd.DataFrame] = None,
-                 display_index: Optional[str] = None, cached_display_name: Optional[str] = None,
-                 alias: Optional[str] = None):
+    def __init__(
+        self,
+        op: Operand,
+        children: List[Union[Column, "Feature"]],
+        data: Optional[pd.DataFrame] = None,
+        display_index: Optional[str] = None,
+        cached_display_name: Optional[str] = None,
+        alias: Optional[str] = None,
+    ):
         self.op = op
         self.children = children
         self.data = data
@@ -258,8 +264,9 @@ class Feature:
 class FeatureGroup:
-    def __init__(self, op: Operand, main_column: Optional[Union[Column, Feature]],
-                 children: List[Union[Column, Feature]]):
+    def __init__(
+        self, op: Operand, main_column: Optional[Union[Column, Feature]], children: List[Union[Column, Feature]]
+    ):
         self.op = op
         self.main_column_node = main_column
         self.children = children

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/data_source/data_source_publisher.py RENAMED Viewed

@@ -31,7 +31,6 @@ class OnlineUploadingType(Enum):
 class DataSourcePublisher:
     FINAL_STATUSES = ["COMPLETED", "FAILED", "TIMED_OUT"]
     DEFAULT_GENERATE_EMBEDDINGS = []
@@ -259,11 +258,7 @@ class DataSourcePublisher:
             except Exception:
                 self.logger.exception(f"Failed to deactivate data tables {data_table_ids} for clients {client_emails}")
-    def upload_online(
-            self,
-            bq_table_id: Optional[str] = None,
-            search_keys: Optional[List[SearchKey]] = None
-    ):
+    def upload_online(self, bq_table_id: Optional[str] = None, search_keys: Optional[List[SearchKey]] = None):
         trace_id = str(uuid.uuid4())
         with MDC(trace_id=trace_id):
             if bq_table_id is None and search_keys is None:

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/dataset.py RENAMED Viewed

@@ -36,12 +36,14 @@ from upgini.metadata import (
     NumericInterval,
     RuntimeParameters,
     SearchCustomization,
+    SearchKey,
 )
 from upgini.normalizer.phone_normalizer import PhoneNormalizer
 from upgini.resource_bundle import bundle
 from upgini.sampler.random_under_sampler import RandomUnderSampler
 from upgini.search_task import SearchTask
 from upgini.utils import combine_search_keys
+from upgini.utils.deduplicate_utils import remove_fintech_duplicates
 from upgini.utils.email_utils import EmailSearchKeyConverter
 try:
@@ -346,9 +348,11 @@ class Dataset:  # (pd.DataFrame):
             ipv6 = ip + "_v6"
             self.data[ipv6] = (
-                self.data[ip].apply(self._to_ipv6)
-                    .apply(self.__ip_to_int)
-                    .astype("string").str.replace(".0", "", regex=False)
+                self.data[ip]
+                .apply(self._to_ipv6)
+                .apply(self.__ip_to_int)
+                .astype("string")
+                .str.replace(".0", "", regex=False)
             )
             self.data = self.data.drop(columns=ip)
             self.meaning_types[ipv6] = FileColumnMeaningType.IPV6_ADDRESS
@@ -811,7 +815,19 @@ class Dataset:  # (pd.DataFrame):
         self.__convert_features_types()
-        self.__clean_duplicates(silent_mode)
+        search_keys = {
+            col: SearchKey.from_meaning_type(key_type)
+            for col, key_type in self.meaning_types.items()
+            if SearchKey.from_meaning_type(key_type) is not None
+        }
+        if validate_target:
+            need_full_defuplication, self.data = remove_fintech_duplicates(self.data, search_keys, self.logger)
+        else:
+            need_full_defuplication = True
+        if need_full_defuplication:
+            self.__clean_duplicates(silent_mode)
         self.__validate_dataset(validate_target, silent_mode)

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/errors.py RENAMED Viewed

@@ -26,6 +26,5 @@ class UpginiConnectionError(Exception):
 class ValidationError(Exception):
     def __init__(self, message):
         super(ValidationError, self).__init__(message)

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/features_enricher.py RENAMED Viewed

@@ -64,6 +64,7 @@ from upgini.utils.datetime_utils import (
     is_blocked_time_series,
     is_time_series,
 )
+from upgini.utils.deduplicate_utils import remove_fintech_duplicates
 from upgini.utils.display_utils import (
     display_html_dataframe,
     do_without_pandas_limits,
@@ -297,8 +298,9 @@ class FeaturesEnricher(TransformerMixin):
     def _set_api_key(self, api_key: str):
         self._api_key = api_key
         if self.logs_enabled:
-            self.logger = LoggerFactory().get_logger(self.endpoint, self._api_key,
-                                                     self.client_ip, self.client_visitorid)
+            self.logger = LoggerFactory().get_logger(
+                self.endpoint, self._api_key, self.client_ip, self.client_visitorid
+            )
     api_key = property(_get_api_key, _set_api_key)
@@ -856,7 +858,7 @@ class FeaturesEnricher(TransformerMixin):
                 if X is not None and y is None:
                     raise ValidationError("X passed without y")
                 effective_X = X if X is not None else self.X
                 effective_eval_set = eval_set if eval_set is not None else self.eval_set
@@ -1200,8 +1202,8 @@ class FeaturesEnricher(TransformerMixin):
             converter = DateTimeSearchKeyConverter(date_column, self.date_format, self.logger)
             extended_X = converter.convert(extended_X, keep_time=True)
             generated_features.extend(converter.generated_features)
-        email_column = self.__get_email_column(search_keys)
-        hem_column = self.__get_hem_column(search_keys)
+        email_column = self._get_email_column(search_keys)
+        hem_column = self._get_hem_column(search_keys)
         if email_column:
             converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
             extended_X = converter.convert(extended_X)
@@ -1469,7 +1471,7 @@ class FeaturesEnricher(TransformerMixin):
         original_df_sampled = self.df_with_original_index[
             self.df_with_original_index[SYSTEM_RECORD_ID].isin(fit_features[SYSTEM_RECORD_ID])
-            ]
+        ]
         enriched_X = drop_existing_columns(enriched_Xy, TARGET)
         if EVAL_SET_INDEX in original_df_sampled.columns:
             Xy_sampled = original_df_sampled.query(f"{EVAL_SET_INDEX} == 0")
@@ -1525,6 +1527,10 @@ class FeaturesEnricher(TransformerMixin):
                 eval_df_with_index[EVAL_SET_INDEX] = idx + 1
                 df_with_eval_set_index = pd.concat([df_with_eval_set_index, eval_df_with_index])
+            _, df_with_eval_set_index = remove_fintech_duplicates(
+                df_with_eval_set_index, self.search_keys, self.logger, silent=True
+            )
             # downsample if need to eval_set threshold
             num_samples = _num_samples(df_with_eval_set_index)
             if num_samples > Dataset.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD:
@@ -1534,9 +1540,7 @@ class FeaturesEnricher(TransformerMixin):
                 )
             X_sampled = (
-                df_with_eval_set_index.query(f"{EVAL_SET_INDEX} == 0")
-                .copy()
-                .drop(columns=[EVAL_SET_INDEX, TARGET])
+                df_with_eval_set_index.query(f"{EVAL_SET_INDEX} == 0").copy().drop(columns=[EVAL_SET_INDEX, TARGET])
             )
             X_sampled, search_keys = self._extend_x(X_sampled, is_demo_dataset)
             y_sampled = df_with_eval_set_index.query(f"{EVAL_SET_INDEX} == 0").copy()[TARGET]
@@ -1760,8 +1764,8 @@ class FeaturesEnricher(TransformerMixin):
                 generated_features.extend(converter.generated_features)
             else:
                 self.logger.info("Input dataset hasn't date column")
-            email_column = self.__get_email_column(search_keys)
-            hem_column = self.__get_hem_column(search_keys)
+            email_column = self._get_email_column(search_keys)
+            hem_column = self._get_hem_column(search_keys)
             email_converted_to_hem = False
             if email_column:
                 converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, self.logger)
@@ -1883,9 +1887,7 @@ class FeaturesEnricher(TransformerMixin):
                     progress = self.get_progress(trace_id, validation_task)
             except KeyboardInterrupt as e:
                 print(bundle.get("search_stopping"))
-                self.rest_client.stop_search_task_v2(
-                    trace_id, validation_task.search_task_id
-                )
+                self.rest_client.stop_search_task_v2(trace_id, validation_task.search_task_id)
                 self.logger.warning(f"Search {validation_task.search_task_id} stopped by user")
                 print(bundle.get("search_stopped"))
                 raise e
@@ -2098,8 +2100,8 @@ class FeaturesEnricher(TransformerMixin):
             self.fit_generated_features.extend(converter.generated_features)
         else:
             self.logger.info("Input dataset hasn't date column")
-        email_column = self.__get_email_column(self.fit_search_keys)
-        hem_column = self.__get_hem_column(self.fit_search_keys)
+        email_column = self._get_email_column(self.fit_search_keys)
+        hem_column = self._get_hem_column(self.fit_search_keys)
         email_converted_to_hem = False
         if email_column:
             converter = EmailSearchKeyConverter(email_column, hem_column, self.fit_search_keys, self.logger)
@@ -2481,21 +2483,6 @@ class FeaturesEnricher(TransformerMixin):
             raise ValidationError(bundle.get("y_is_constant_eval_set"))
         return validated_eval_X, validated_eval_y
-    def _validate_baseline_score(self, X: pd.DataFrame, eval_set: Optional[List[Tuple]]):
-        if self.baseline_score_column is not None:
-            if self.baseline_score_column not in X.columns:
-                raise ValidationError(bundle.get("baseline_score_column_not_exists").format(self.baseline_score_column))
-            if X[self.baseline_score_column].isna().any():
-                raise ValidationError(bundle.get("baseline_score_column_has_na"))
-            if eval_set is not None:
-                if isinstance(eval_set, tuple):
-                    eval_set = [eval_set]
-                for eval in eval_set:
-                    if self.baseline_score_column not in eval[0].columns:
-                        raise ValidationError(bundle.get("baseline_score_column_not_exists"))
-                    if eval[0][self.baseline_score_column].isna().any():
-                        raise ValidationError(bundle.get("baseline_score_column_has_na"))
     def _validate_baseline_score(self, X: pd.DataFrame, eval_set: Optional[List[Tuple]]):
         if self.baseline_score_column is not None:
@@ -2660,17 +2647,23 @@ class FeaturesEnricher(TransformerMixin):
         return [col for col, t in search_keys.items() if t not in [SearchKey.DATE, SearchKey.DATETIME]]
     @staticmethod
-    def __get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
+    def _get_email_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
         for col, t in search_keys.items():
             if t == SearchKey.EMAIL:
                 return col
     @staticmethod
-    def __get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
+    def _get_hem_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
         for col, t in search_keys.items():
             if t == SearchKey.HEM:
                 return col
+    @staticmethod
+    def _get_phone_column(search_keys: Dict[str, SearchKey]) -> Optional[str]:
+        for col, t in search_keys.items():
+            if t == SearchKey.PHONE:
+                return col
     def __add_fit_system_record_id(
         self, df: pd.DataFrame, meaning_types: Dict[str, FileColumnMeaningType], search_keys: Dict[str, SearchKey]
     ) -> pd.DataFrame:
@@ -2785,9 +2778,9 @@ class FeaturesEnricher(TransformerMixin):
         result_features.index.name = original_index_name
         if rows_to_drop is not None:
-            print(f"Before dropping target outliers size: {len(result_features)}")
+            self.logger.info(f"Before dropping target outliers size: {len(result_features)}")
             result_features = result_features[~result_features[SYSTEM_RECORD_ID].isin(rows_to_drop[SYSTEM_RECORD_ID])]
-            print(f"After dropping target outliers size: {len(result_features)}")
+            self.logger.info(f"After dropping target outliers size: {len(result_features)}")
         result_eval_sets = dict()
         if not is_transform and EVAL_SET_INDEX in result_features.columns:
@@ -2995,9 +2988,9 @@ class FeaturesEnricher(TransformerMixin):
                     self.logger.warning(f"Feature meta for display index {m.display_index} not found")
                     continue
                 description["shap"] = feature_meta.shap_value
-                description["Sources"] = feature_meta.data_source\
-                    .replace("AutoFE: features from ", "")\
-                    .replace("AutoFE: feature from ", "")
+                description["Sources"] = feature_meta.data_source.replace("AutoFE: features from ", "").replace(
+                    "AutoFE: feature from ", ""
+                )
                 description["Feature name"] = feature_meta.name
                 feature_idx = 1

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/http.py RENAMED Viewed

@@ -308,7 +308,6 @@ class _RestClient:
         # self.silent_mode = silent_mode
         self.client_ip = client_ip
         self.client_visitorid = client_visitorid
-        print(f"Created RestClient with {client_ip} and {client_visitorid}")
         self._access_token = self._refresh_access_token()
         # self._access_token: Optional[str] = None  # self._refresh_access_token()
         self.last_refresh_time = time.time()
@@ -442,9 +441,7 @@ class _RestClient:
     ) -> SearchTaskResponse:
         api_path = self.INITIAL_SEARCH_URI_FMT_V2
-        print(f"Start initial search with {self.client_ip} and {self.client_visitorid}")
         track_metrics = get_track_metrics(self.client_ip, self.client_visitorid)
-        print(f"Sending track metrics: {track_metrics}")
         def open_and_send():
             md5_hash = hashlib.md5()
@@ -486,7 +483,7 @@ class _RestClient:
                     api_path, files, trace_id=trace_id, additional_headers=additional_headers
                 )
-        response = self._with_unauth_retry(lambda: open_and_send())
+        response = self._with_unauth_retry(open_and_send)
         return SearchTaskResponse(response)
     def check_uploaded_file_v2(self, trace_id: str, file_upload_id: str, metadata: FileMetadata) -> bool:
@@ -571,7 +568,7 @@ class _RestClient:
                     api_path, files, trace_id=trace_id, additional_headers=additional_headers
                 )
-        response = self._with_unauth_retry(lambda: open_and_send())
+        response = self._with_unauth_retry(open_and_send)
         return SearchTaskResponse(response)
     def validation_search_without_upload_v2(
@@ -912,8 +909,12 @@ def resolve_api_token(api_token: Optional[str]) -> str:
         return DEMO_API_KEY
-def get_rest_client(backend_url: Optional[str] = None, api_token: Optional[str] = None,
-                    client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> _RestClient:
+def get_rest_client(
+    backend_url: Optional[str] = None,
+    api_token: Optional[str] = None,
+    client_ip: Optional[str] = None,
+    client_visitorid: Optional[str] = None,
+) -> _RestClient:
     url = _resolve_backend_url(backend_url)
     token = resolve_api_token(api_token)
@@ -925,15 +926,21 @@ def is_demo_api_key(api_token: Optional[str]) -> bool:
 @lru_cache()
-def _get_rest_client(backend_url: str, api_token: str,
-                     client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> _RestClient:
+def _get_rest_client(
+    backend_url: str, api_token: str, client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
+) -> _RestClient:
     return _RestClient(backend_url, api_token, client_ip, client_visitorid)
 class BackendLogHandler(logging.Handler):
-    def __init__(self, rest_client: _RestClient,
-                 client_ip: Optional[str] = None, client_visitorid: Optional[str] = None,
-                 *args, **kwargs) -> None:
+    def __init__(
+        self,
+        rest_client: _RestClient,
+        client_ip: Optional[str] = None,
+        client_visitorid: Optional[str] = None,
+        *args,
+        **kwargs,
+    ) -> None:
         super().__init__(*args, **kwargs)
         self.rest_client = rest_client
         self.track_metrics = None
@@ -987,8 +994,11 @@ class LoggerFactory:
         root.handlers.clear()
     def get_logger(
-        self, backend_url: Optional[str] = None, api_token: Optional[str] = None,
-        client_ip: Optional[str] = None, client_visitorid: Optional[str] = None
+        self,
+        backend_url: Optional[str] = None,
+        api_token: Optional[str] = None,
+        client_ip: Optional[str] = None,
+        client_visitorid: Optional[str] = None,
     ) -> logging.Logger:
         url = _resolve_backend_url(backend_url)
         token = resolve_api_token(api_token)

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/mdc/__init__.py RENAMED Viewed

@@ -3,8 +3,7 @@
 .. module: mdc
 .. moduleauthor:: Aljosha Friemann a.friemann@automate.wtf
 """
-from __future__ import (absolute_import, division, print_function,
-                        unicode_literals)
+from __future__ import absolute_import, division, print_function, unicode_literals
 import logging

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/mdc/context.py RENAMED Viewed

@@ -32,9 +32,7 @@ def get_mdc_fields():
 @contextmanager
 def new_log_context(**kwargs):
-    context_id = "mdc-{thread}-{context}".format(
-        thread=threading.current_thread().ident, context=uuid.uuid4()
-    )
+    context_id = "mdc-{thread}-{context}".format(thread=threading.current_thread().ident, context=uuid.uuid4())
     LOGGER.debug("creating context %s", context_id)
@@ -48,11 +46,9 @@ def new_log_context(**kwargs):
         setattr(context, key, value)
     try:
         yield context
     finally:
         LOGGER.debug("deleting context %s", context_id)
         try:

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/normalizer/phone_normalizer.py RENAMED Viewed

@@ -7,7 +7,6 @@ from upgini.errors import ValidationError
 class PhoneNormalizer:
     def __init__(self, df: pd.DataFrame, phone_column_name: str, country_column_name: Optional[str] = None):
         self.df = df
         self.phone_column_name = phone_column_name
@@ -78,7 +77,7 @@ class PhoneNormalizer:
         try:
             value = str(value)
             if value.endswith(".0"):
-                value = value[:len(value) - 2]
+                value = value[: len(value) - 2]
             numeric_filter = filter(str.isdigit, value)
             numeric_string = "".join(numeric_filter)
             return PhoneNormalizer.validate_length(int(numeric_string))
@@ -337,5 +336,5 @@ class PhoneNormalizer:
         "PF": ("689", 7),
         "TK": ("690", 7),
         "FM": ("691", 7),
-        "MH": ("692", 7)
-        }
+        "MH": ("692", 7),
+    }

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/resource_bundle/exceptions.py RENAMED Viewed

@@ -9,7 +9,6 @@ class MalformedResourceBundleError(ResourceBundleError):
 class NotInResourceBundleError(ResourceBundleError):
     def __init__(self, bundle_name: str, key: str):
         """
         Error that is raised when a key could not be found in a ResourceBundle.

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/resource_bundle/strings.properties RENAMED Viewed

@@ -144,6 +144,7 @@ dataset_empty_column_names=Some column names are empty. Add names please
 dataset_too_long_column_name=Column {} is too long: {} characters. Remove this column or trim length to 50 characters
 dataset_full_duplicates=\nWARNING: {:.5f}% of the rows are fully duplicated
 dataset_diff_target_duplicates=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates with different y values. These rows will be deleted as incorrect\nIncorrect row indexes: {}
+dataset_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
 dataset_drop_old_dates=\nWARNING: We don't have data before '2000-01-01' and removed all earlier records from the search dataset
 dataset_all_dates_old=There is empty train dataset after removing data before '2000-01-01'
 dataset_invalid_target_type=Unexpected dtype of target for binary task type: {}. Expected int or bool

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/sampler/base.py RENAMED Viewed

@@ -47,9 +47,7 @@ class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
             Return the instance itself.
         """
         X, y, _ = self._check_X_y(X, y)
-        self.sampling_strategy_ = check_sampling_strategy(
-            self.sampling_strategy, y, self._sampling_type
-        )
+        self.sampling_strategy_ = check_sampling_strategy(self.sampling_strategy, y, self._sampling_type)
         return self
     def fit_resample(self, X, y):
@@ -77,15 +75,11 @@ class SamplerMixin(BaseEstimator, metaclass=ABCMeta):
         arrays_transformer = ArraysTransformer(X, y)
         X, y, binarize_y = self._check_X_y(X, y)
-        self.sampling_strategy_ = check_sampling_strategy(
-            self.sampling_strategy, y, self._sampling_type
-        )
+        self.sampling_strategy_ = check_sampling_strategy(self.sampling_strategy, y, self._sampling_type)
         output = self._fit_resample(X, y)
-        y_ = (
-            label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
-        )
+        y_ = label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
         X_, y_ = arrays_transformer.transform(output[0], y_)
         return (X_, y_) if len(output) == 2 else (X_, y_, output[2])

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/sampler/random_under_sampler.py RENAMED Viewed

@@ -76,9 +76,7 @@ RandomUnderSampler # doctest: +NORMALIZE_WHITESPACE
     """
     @_deprecate_positional_args
-    def __init__(
-        self, *, sampling_strategy="auto", random_state=None, replacement=False
-    ):
+    def __init__(self, *, sampling_strategy="auto", random_state=None, replacement=False):
         super().__init__(sampling_strategy=sampling_strategy)
         self.random_state = random_state
         self.replacement = replacement

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/search_task.py RENAMED Viewed

@@ -79,16 +79,12 @@ class SearchTask:
             with Spinner():
                 if self.PROTECT_FROM_RATE_LIMIT:
                     time.sleep(1)  # this is neccesary to avoid requests rate limit restrictions
-                self.summary = self.rest_client.search_task_summary_v2(
-                    trace_id, search_task_id
-                )
+                self.summary = self.rest_client.search_task_summary_v2(trace_id, search_task_id)
                 while self.summary.status not in completed_statuses and (
                     not check_fit or "VALIDATION" not in self.summary.status
                 ):
                     time.sleep(self.POLLING_DELAY_SECONDS)
-                    self.summary = self.rest_client.search_task_summary_v2(
-                        trace_id, search_task_id
-                    )
+                    self.summary = self.rest_client.search_task_summary_v2(trace_id, search_task_id)
                     if self.summary.status in failed_statuses:
                         self.logger.error(f"Search {search_task_id} failed with status {self.summary.status}")
                         raise RuntimeError(bundle.get("search_task_failed_status"))
@@ -130,9 +126,7 @@ class SearchTask:
             for provider_summary in self.summary.initial_important_providers:
                 if provider_summary.status == "COMPLETED":
                     self.provider_metadata_v2.append(
-                        self.rest_client.get_provider_search_metadata_v3(
-                            provider_summary.ads_search_task_id, trace_id
-                        )
+                        self.rest_client.get_provider_search_metadata_v3(provider_summary.ads_search_task_id, trace_id)
                     )
                     if provider_summary.unused_features_for_generation is not None:
                         self.unused_features_for_generation.extend(provider_summary.unused_features_for_generation)
@@ -271,7 +265,7 @@ class SearchTask:
             self.rest_client._refresh_token,
             trace_id,
             self.search_task_id,
-            self.PROTECT_FROM_RATE_LIMIT
+            self.PROTECT_FROM_RATE_LIMIT,
         )
     def get_max_initial_eval_set_hit_rate_v2(self) -> Optional[Dict[int, float]]:

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/spinner.py RENAMED Viewed

@@ -4,13 +4,7 @@ import time
 class Spinner:
-    DEFAULT_FRAMES = [
-        "-",
-        "\\",
-        "|",
-        "/"
-    ]
+    DEFAULT_FRAMES = ["-", "\\", "|", "/"]
     def __init__(self, frames: List[str] = DEFAULT_FRAMES, step_time: float = 0.2):
         self.stop = False

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/country_utils.py RENAMED Viewed

@@ -22,7 +22,9 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
             return df
         df[country_column] = (
-            df[country_column].astype("string").str.upper()
+            df[country_column]
+            .astype("string")
+            .str.upper()
             .map(CountrySearchKeyDetector.COUNTRIES)
             .fillna(df[country_column])
         )

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/datetime_utils.py RENAMED Viewed

@@ -61,9 +61,22 @@ class DateTimeSearchKeyConverter:
         elif is_period_dtype(df[self.date_column]):
             df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
         elif is_numeric_dtype(df[self.date_column]):
-            msg = f"Unsupported type of date column {self.date_column}. Convert to datetime please."
-            self.logger.warning(msg)
-            raise ValidationError(msg)
+            # 315532801 - 2524608001    - seconds
+            # 315532801000 - 2524608001000 - milliseconds
+            # 315532801000000 - 2524608001000000 - microseconds
+            # 315532801000000000 - 2524608001000000000 - nanoseconds
+            if df[self.date_column].apply(lambda x: 10**16 < x).all():
+                df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ns")
+            elif df[self.date_column].apply(lambda x: 10**14 < x < 10**16).all():
+                df[self.date_column] = pd.to_datetime(df[self.date_column], unit="us")
+            elif df[self.date_column].apply(lambda x: 10**11 < x < 10**14).all():
+                df[self.date_column] = pd.to_datetime(df[self.date_column], unit="ms")
+            elif df[self.date_column].apply(lambda x: 0 < x < 10*11).all():
+                df[self.date_column] = pd.to_datetime(df[self.date_column], unit="s")
+            else:
+                msg = f"Unsupported type of date column {self.date_column}. Convert to datetime please."
+                self.logger.warning(msg)
+                raise ValidationError(msg)
         # If column with date is datetime then extract seconds of the day and minute of the hour
         # as additional features

upgini-1.1.244a1/src/upgini/utils/deduplicate_utils.py ADDED Viewed

@@ -0,0 +1,82 @@
+from logging import Logger
+from typing import Dict, List, Optional, Tuple, Union
+import pandas as pd
+from upgini.metadata import TARGET, ModelTaskType, SearchKey
+from upgini.resource_bundle import bundle
+from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
+from upgini.utils.target_utils import define_task
+def remove_fintech_duplicates(
+    df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: Optional[Logger] = None, silent=False
+) -> Tuple(bool, pd.DataFrame):
+    # Base checks
+    need_full_deduplication = True
+    if define_task(df[TARGET], silent=True) != ModelTaskType.BINARY:
+        return need_full_deduplication, df
+    date_col = _get_column_by_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
+    if date_col is None:
+        return need_full_deduplication, df
+    personal_cols = []
+    phone_col = _get_column_by_key(search_keys, SearchKey.PHONE)
+    if phone_col:
+        personal_cols.append(phone_col)
+    email_col = _get_column_by_key(search_keys, SearchKey.EMAIL)
+    if email_col:
+        personal_cols.append(email_col)
+    hem_col = _get_column_by_key(search_keys, SearchKey.HEM)
+    if hem_col:
+        personal_cols.append(hem_col)
+    if len(personal_cols) == 0:
+        return need_full_deduplication, df
+    grouped_by_personal_cols = df.groupby(personal_cols, group_keys=False)
+    uniques = grouped_by_personal_cols[date_col].nunique()
+    total = len(uniques)
+    diff_dates = len(uniques[uniques > 1])
+    if diff_dates / total >= 0.6:
+        return need_full_deduplication, df
+    # Additional checks
+    need_full_deduplication = False
+    duplicates = df.duplicated(personal_cols, keep=False)
+    duplicate_rows = df[duplicates]
+    if len(duplicate_rows) == 0:
+        return need_full_deduplication, df
+    if grouped_by_personal_cols[TARGET].apply(lambda x: len(x.unique()) == 1).all():
+        return need_full_deduplication, df
+    def has_diff_target_within_60_days(rows):
+        rows = rows.sort_values(by=date_col)
+        return len(rows[rows[TARGET].ne(rows[TARGET].shift()) & (rows[date_col].diff() < 60 * 24 * 60 * 60 * 1000)]) > 0
+    df = DateTimeSearchKeyConverter(date_col).convert(df)
+    grouped_by_personal_cols = df.groupby(personal_cols, group_keys=False)
+    rows_with_diff_target = grouped_by_personal_cols.filter(has_diff_target_within_60_days)
+    if len(rows_with_diff_target) > 0:
+        perc = len(rows_with_diff_target) * 100 / len(df)
+        msg = bundle.get("dataset_diff_target_duplicates_fintech").format(
+            perc, len(rows_with_diff_target), rows_with_diff_target.index.to_list()
+        )
+        if not silent:
+            print(msg)
+        if logger:
+            logger.warning(msg)
+        df = df[~df.index.isin(rows_with_diff_target.index)]
+    return need_full_deduplication, df
+def _get_column_by_key(search_keys: Dict[str, SearchKey], keys: Union[SearchKey, List[SearchKey]]) -> Optional[str]:
+    for col, key_type in search_keys.items():
+        if (isinstance(keys, list) and key_type in keys) or key_type == keys:
+            return col

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/email_utils.py RENAMED Viewed

@@ -29,7 +29,6 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
 class EmailSearchKeyConverter:
     HEM_COLUMN_NAME = "hashed_email"
     DOMAIN_COLUMN_NAME = "email_domain"
     EMAIL_ONE_DOMAIN_COLUMN_NAME = "email_one_domain"

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/fallback_progress_bar.py RENAMED Viewed

@@ -2,8 +2,8 @@ from typing import Tuple
 class CustomFallbackProgressBar:
-    """Progressbar supports displaying a progressbar like element
-    """
+    """Progressbar supports displaying a progressbar like element"""
     def __init__(self, total=100):
         """Creates a new progressbar
@@ -20,12 +20,9 @@ class CustomFallbackProgressBar:
     def __repr__(self):
         fraction = self.progress / self.total
-        filled = '=' * int(fraction * self.text_width)
-        rest = ' ' * (self.text_width - len(filled))
-        return '[{}{}] {}% {} {}'.format(
-            filled, rest,
-            self.progress, self._stage, self._eta
-        )
+        filled = "=" * int(fraction * self.text_width)
+        rest = " " * (self.text_width - len(filled))
+        return "[{}{}] {}% {} {}".format(filled, rest, self.progress, self._stage, self._eta)
     def display(self):
         print(self)

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/warning_counter.py RENAMED Viewed

@@ -1,5 +1,4 @@
 class WarningCounter:
     def __init__(self):
         self._count = 0

{upgini-1.1.242a3 → upgini-1.1.244a1/src/upgini.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: upgini
-Version: 1.1.242a3
+Version: 1.1.244a1
 Summary: Intelligent data search & enrichment for Machine Learning
 Home-page: https://upgini.com/
 Author: Upgini Developers

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini.egg-info/SOURCES.txt RENAMED Viewed

@@ -49,6 +49,7 @@ src/upgini/utils/country_utils.py
 src/upgini/utils/custom_loss_utils.py
 src/upgini/utils/cv_utils.py
 src/upgini/utils/datetime_utils.py
+src/upgini/utils/deduplicate_utils.py
 src/upgini/utils/display_utils.py
 src/upgini/utils/email_utils.py
 src/upgini/utils/fallback_progress_bar.py

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_datetime_utils.py RENAMED Viewed

@@ -127,7 +127,8 @@ def test_multivariate_timeseries_detection():
 def test_multivariate_time_series():
-    df = pd.DataFrame({
+    df = pd.DataFrame(
+        {
             "date": [
                 "2020-01-01 00:00:00",
                 "2020-01-01 00:00:02",
@@ -135,44 +136,49 @@ def test_multivariate_time_series():
                 "2020-01-01 00:00:06",
                 "2020-01-01 00:00:08",
             ]
-        })
+        }
+    )
     assert not is_blocked_time_series(df, "date", ["date"])
-    df = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2020-02-01")
-    })
+    df = pd.DataFrame({"date": pd.date_range("2020-01-01", "2020-02-01")})
     assert not is_blocked_time_series(df, "date", ["date"])
-    df = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2021-01-01")
-    })
+    df = pd.DataFrame({"date": pd.date_range("2020-01-01", "2021-01-01")})
     assert is_blocked_time_series(df, "date", ["date"])
-    df1 = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2021-01-01"),
-        "feature1": np.random.randint(0, 1000, 367),
-        "feature2": np.random.randint(0, 1000, 367)
-    })
-    df2 = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2021-01-01"),
-        "feature1": np.random.randint(0, 1000, 367),
-        "feature2": np.random.randint(0, 1000, 367)
-    })
+    df1 = pd.DataFrame(
+        {
+            "date": pd.date_range("2020-01-01", "2021-01-01"),
+            "feature1": np.random.randint(0, 1000, 367),
+            "feature2": np.random.randint(0, 1000, 367),
+        }
+    )
+    df2 = pd.DataFrame(
+        {
+            "date": pd.date_range("2020-01-01", "2021-01-01"),
+            "feature1": np.random.randint(0, 1000, 367),
+            "feature2": np.random.randint(0, 1000, 367),
+        }
+    )
     df = pd.concat([df1, df2])
     assert is_blocked_time_series(df, "date", ["date"])
-    df1 = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2021-01-01"),
-        "feature1": np.random.randint(0, 1000, 367),
-        "feature2": np.random.randint(0, 1000, 367),
-        "feature3": np.random.randint(0, 1000, 367),
-    })
-    df2 = pd.DataFrame({
-        "date": pd.date_range("2020-01-01", "2021-01-01"),
-        "feature1": np.random.randint(0, 1000, 367),
-        "feature2": np.random.randint(0, 1000, 367),
-        "feature3": np.random.randint(0, 1000, 367),
-    })
+    df1 = pd.DataFrame(
+        {
+            "date": pd.date_range("2020-01-01", "2021-01-01"),
+            "feature1": np.random.randint(0, 1000, 367),
+            "feature2": np.random.randint(0, 1000, 367),
+            "feature3": np.random.randint(0, 1000, 367),
+        }
+    )
+    df2 = pd.DataFrame(
+        {
+            "date": pd.date_range("2020-01-01", "2021-01-01"),
+            "feature1": np.random.randint(0, 1000, 367),
+            "feature2": np.random.randint(0, 1000, 367),
+            "feature3": np.random.randint(0, 1000, 367),
+        }
+    )
     df = pd.concat([df1, df2])
     assert not is_blocked_time_series(df, "date", ["date"])

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_email_utils.py RENAMED Viewed

@@ -58,7 +58,7 @@ def test_convertion_to_hem():
                 None,
                 None,
                 None,
-                None
+                None,
             ],
             EmailSearchKeyConverter.EMAIL_ONE_DOMAIN_COLUMN_NAME: ["tgoogle.com", None, None, None, None, None],
             EmailSearchKeyConverter.DOMAIN_COLUMN_NAME: ["google.com", None, None, None, None, None],

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_etalon_validation.py RENAMED Viewed

@@ -58,9 +58,14 @@ def test_string_ip_to_int_conversion():
             {"ip": None},
         ]
     )
-    dataset = Dataset("test", df=df, search_keys=[("ip", )], meaning_types={
-        "ip": FileColumnMeaningType.IP_ADDRESS,
-    })
+    dataset = Dataset(
+        "test",
+        df=df,
+        search_keys=[("ip",)],
+        meaning_types={
+            "ip": FileColumnMeaningType.IP_ADDRESS,
+        },
+    )
     dataset._Dataset__rename_columns()
     dataset._Dataset__convert_ip()
     assert dataset.data["ip_bb9af5_v4"].dtype == "Int64"
@@ -77,7 +82,7 @@ def test_python_ip_to_int_conversion():
             {"ip": ipaddress.ip_address("192.168.1.1")},
         ]
     )
-    dataset = Dataset("test", df=df, search_keys=[("ip", )])
+    dataset = Dataset("test", df=df, search_keys=[("ip",)])
     dataset.meaning_types = {
         "ip": FileColumnMeaningType.IP_ADDRESS,
     }
@@ -91,7 +96,7 @@ def test_python_ip_to_int_conversion():
 def test_ip_v6_conversion():
     df = pd.DataFrame({"ip": ["::cf:befe:525b"]})
-    dataset = Dataset("test", df=df, search_keys=[("ip", )])
+    dataset = Dataset("test", df=df, search_keys=[("ip",)])
     dataset.meaning_types = {
         "ip": FileColumnMeaningType.IP_ADDRESS,
     }
@@ -107,7 +112,7 @@ def test_int_ip_to_int_conversion():
     df = pd.DataFrame(
         {"ip": [3232235777, 892262568539]},
     )
-    dataset = Dataset("test", df=df, search_keys=[("ip", )])  # type: ignore
+    dataset = Dataset("test", df=df, search_keys=[("ip",)])  # type: ignore
     dataset.meaning_types = {
         "ip": FileColumnMeaningType.IP_ADDRESS,
     }
@@ -615,9 +620,7 @@ def test_columns_renaming():
     df = pd.concat([df1, df2], axis=1)
-    dataset = Dataset(
-        "tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
-    )
+    dataset = Dataset("tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)])
     dataset._Dataset__rename_columns()
     print(dataset)
     assert set(dataset.data.columns.to_list()) == {"feature1_422b73", "date_0e8763", "feature1_422b73_0"}
@@ -632,9 +635,7 @@ def test_too_long_columns():
         }
     )
-    dataset = Dataset(
-        "tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)]
-    )
+    dataset = Dataset("tds", df=df, meaning_types={"date": FileColumnMeaningType.DATE}, search_keys=[("date",)])
     dataset._Dataset__rename_columns()
     print(dataset)
     assert set(dataset.data.columns.to_list()) == {

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_widget.py RENAMED Viewed

@@ -417,7 +417,7 @@ def test_widget(requests_mock: Mocker):
                     '<button kind="secondary"><p>Instant purchase</p></button></a></div>'
                 ),
                 (
-                    "<div class=\"stButton\"><a href='https://app.snowflake.com/marketplace/listing/GZSTZ3VDMF6/"
+                    '<div class="stButton"><a href=\'https://app.snowflake.com/marketplace/listing/GZSTZ3VDMF6/'
                     "?referer=upgini' target='_blank' rel='noopener noreferrer'><button kind=\"secondary\"><p>"
                     "Instant purchase</p></button></a></div>"
                 ),

{upgini-1.1.242a3 → upgini-1.1.244a1}/LICENSE RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/README.md RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/pyproject.toml RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/setup.cfg RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/ads.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/ads_management/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/all_operands.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/binary.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/groupby.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/operand.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/unary.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/autofe/vector.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/data_source/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/fingerprint.js RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/metadata.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/metrics.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/normalizer/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/resource_bundle/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/sampler/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/sampler/utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/__init__.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/base_search_key_detector.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/blocked_time_series.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/custom_loss_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/cv_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/display_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/features_validator.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/format.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/ip_utils.py RENAMED Viewed

@@ -5,12 +5,12 @@ import pandas as pd
 from requests import get
 from upgini.metadata import SearchKey
 # from upgini.resource_bundle import bundle
 # from upgini.utils.track_info import get_track_metrics
 class IpToCountrySearchKeyConverter:
     url = "http://ip-api.com/json/{}"
     def __init__(

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/phone_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/postal_code_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/progress_bar.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/sklearn_ext.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/target_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/utils/track_info.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini/version_validator.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini.egg-info/requires.txt RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/src/upgini.egg-info/top_level.txt RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_binary_dataset.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_blocked_time_series.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_categorical_dataset.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_continuous_dataset.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_country_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_custom_loss_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_features_enricher.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_metrics.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_phone_utils.py RENAMED Viewed

File without changes

{upgini-1.1.242a3 → upgini-1.1.244a1}/tests/test_postal_code_utils.py RENAMED Viewed

File without changes

upgini 1.1.242a3__tar.gz → 1.1.244a1__tar.gz

Potentially problematic release.

upgini 1.1.242a3tar.gz → 1.1.244a1tar.gz