PyPI - upgini - Versions diffs - 1.2.117__py3-none-any.whl → 1.2.118__py3-none-any.whl - Mend

upgini 1.2.117py3-none-any.whl → 1.2.118py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

upgini/__about__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.2.~~117~~"
1	+ __version__ = "1.2.118"

upgini/features_enricher.py CHANGED Viewed

@@ -1423,8 +1423,15 @@ class FeaturesEnricher(TransformerMixin):
         # Find latest eval set or earliest if all eval sets are before train set
         date_column = self._get_date_column(search_keys)
-        # Get minimum date from main dataset X
-        main_min_date = X[date_column].dropna().min()
+        x_date = X[date_column].dropna()
+        if not is_numeric_dtype(x_date):
+            x_date = pd.to_datetime(x_date).dt.floor("D").astype(np.int64) / 10**6
+        main_min_date = x_date.min()
+        for eval_x, _ in eval_set:
+            eval_x_date = eval_x[date_column].dropna()
+            if not is_numeric_dtype(eval_x_date):
+                eval_x[date_column] = pd.to_datetime(eval_x_date).dt.floor("D").astype(np.int64) / 10**6
         # Find minimum date for each eval_set and compare with main dataset
         eval_dates = []
@@ -1433,8 +1440,11 @@ class FeaturesEnricher(TransformerMixin):
                 if len(eval_x) < 1000:
                     self.logger.warning(f"Eval_set {i} has less than 1000 rows. It will be ignored for stability check")
                     continue
-                eval_min_date = eval_x[date_column].dropna().min()
-                eval_max_date = eval_x[date_column].dropna().max()
+                eval_x_date = eval_x[date_column].dropna()
+                if not is_numeric_dtype(eval_x_date):
+                    eval_x_date = pd.to_datetime(eval_x_date).dt.floor("D").astype(np.int64) / 10**6
+                eval_min_date = eval_x_date.min()
+                eval_max_date = eval_x_date.max()
                 eval_dates.append((i, eval_min_date, eval_max_date))
         if not eval_dates:
@@ -1460,6 +1470,10 @@ class FeaturesEnricher(TransformerMixin):
         checking_eval_set_df = checking_eval_set_df.copy()
         checking_eval_set_df[date_column] = eval_set_dates[selected_eval_set_idx]
+        if not is_numeric_dtype(checking_eval_set_df[date_column]):
+            checking_eval_set_df[date_column] = (
+                pd.to_datetime(checking_eval_set_df[date_column]).dt.floor("D").astype(np.int64) / 10**6
+            )
         psi_values_sparse = calculate_sparsity_psi(
             checking_eval_set_df, cat_features, date_column, self.logger, model_task_type
@@ -3708,6 +3722,25 @@ if response.status_code == 200:
             else:
                 raise ValidationError(self.bundle.get("eval_x_and_x_diff_shape"))
+        if any(validated_eval_X.dtypes != X.dtypes):
+            x_types = X.dtypes
+            eval_types = validated_eval_X.dtypes
+            # Find columns with different types
+            diff_cols = [
+                (col, x_types[col], eval_types[col])
+                for col in x_types.index
+                if x_types[col] != eval_types[col]
+            ]
+            diff_col_names = [col for col, _, _ in diff_cols]
+            # print columns with different types
+            print("Columns with different types:")
+            for col, x_type, eval_type in diff_cols:
+                print("-" * 50)
+                print(f"Column: {col}")
+                print(f"X type:        {x_type}")
+                print(f"Eval_set type: {eval_type}")
+            raise ValidationError(self.bundle.get("eval_x_and_x_diff_dtypes").format(diff_col_names))
         if _num_samples(validated_eval_X) != _num_samples(eval_y):
             raise ValidationError(
                 self.bundle.get("x_and_y_diff_size_eval_set").format(
@@ -4420,7 +4453,8 @@ if response.status_code == 200:
         if len(features_info) > 0:
             self.features_info = pd.DataFrame(features_info)
-            if self.features_info[self.bundle.get("features_info_psi")].isna().all():
+            # If all psi values are 0 or null, drop psi column
+            if self.features_info[self.bundle.get("features_info_psi")].fillna(0.0).eq(0.0).all():
                 self.features_info.drop(columns=[self.bundle.get("features_info_psi")], inplace=True)
             self._features_info_without_links = pd.DataFrame(features_info_without_links)
             self._internal_features_info = pd.DataFrame(internal_features_info)

upgini/resource_bundle/strings.properties CHANGED Viewed

@@ -123,6 +123,7 @@ unsupported_type_eval_set=Unsupported type of eval_set: {}. It should be list of
 eval_set_invalid_tuple_size=eval_set contains a tuple of size {}. It should contain only pairs of X and y or X only
 unsupported_x_type_eval_set=Unsupported type of X in eval_set: {}. Use pandas.DataFrame, pandas.Series or numpy.ndarray or list.
 eval_x_and_x_diff_shape=The column set in eval_set are differ from the column set in X
+eval_x_and_x_diff_dtypes=The column types in eval_set are different from the column types in X: {}
 unsupported_y_type_eval_set=Unsupported type of y in eval_set: {}. Use pandas.Series, numpy.ndarray or list
 y_is_constant_eval_set=y in eval_set is a constant. Relevant feature search requires a non-constant y
 x_and_y_diff_size_eval_set=X and y in eval_set contain different number of rows: {}, {}

upgini/utils/psi.py CHANGED Viewed

@@ -82,9 +82,6 @@ def calculate_features_psi(
 ) -> dict[str, float]:
     empty_res = {col: 0.0 for col in df.columns if col not in [TARGET, date_column]}
-    if not is_numeric_dtype(df[date_column]):
-        df[date_column] = pd.to_datetime(df[date_column]).dt.floor("D").astype(np.int64) / 10**6
     # Filter out rows with missing dates
     df = df[df[date_column].notna()].copy()

{upgini-1.2.117.dist-info → upgini-1.2.118.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: upgini
-Version: 1.2.117
+Version: 1.2.118
 Summary: Intelligent data search & enrichment for Machine Learning
 Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
 Project-URL: Homepage, https://upgini.com/

{upgini-1.2.117.dist-info → upgini-1.2.118.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-upgini/__about__.py,sha256=MY7Fho70n94XaciDTteAp4gxVleTVF6idcG3aECjijI,24
+upgini/__about__.py,sha256=q02CtZPV2DVtBrD7C_RFfsbI15l7QGmxfUzcCx5UykM,24
 upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
 upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
 upgini/dataset.py,sha256=pQ8JQe0cdygD-W9GefJmfE6bnj4EYzXsjlgWdIS9nS8,31578
 upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
-upgini/features_enricher.py,sha256=iYay-Ye5WGntieg3X7uyg9W3x_1FUELrmhJnJIvQMeI,228897
+upgini/features_enricher.py,sha256=zGWU8l6dWZwV1fsQD-j9tTKP9X6mUO9HPnwcGrJFS8o,230596
 upgini/http.py,sha256=-J_wOpnwVnT0ebPC6sOs6fN3AWtCD0LJLu6nlYmxaqk,44348
 upgini/metadata.py,sha256=VzgtgEbPPtNxTrj9LM5qSDP3DujHwAXqbUSKBjPcb9c,12477
 upgini/metrics.py,sha256=KCPE_apPN-9BIdv6GqASbJVaB_gBcy8wzNApAcyaGo4,46020
@@ -38,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 upgini/normalizer/normalize_utils.py,sha256=mDh2mBW3aQMB4EFP2aHbf2dGMVkOcWnp4sKKvKDBh8w,8511
 upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
 upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
-upgini/resource_bundle/strings.properties,sha256=VbfRqgg2kuWqmUyieGNxuY5dy0TpOQ-L3fHlWB7o2_w,29186
+upgini/resource_bundle/strings.properties,sha256=cNeVkWZMyjGCYGqmOOeJqisqPSEBtmfIw_U1rmgQw4w,29285
 upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
 upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 upgini/sampler/base.py,sha256=Fva2FEhLiNRPZ9Q6uOtJRtRzwsayjv7aphalAZO_4lc,6452
@@ -66,7 +66,7 @@ upgini/utils/mstats.py,sha256=u3gQVUtDRbyrOQK6V1UJ2Rx1QbkSNYGjXa6m3Z_dPVs,6286
 upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
 upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
 upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
-upgini/utils/psi.py,sha256=vuVAo5-4HplpblQA7BP8bouI8VQDEb___MW98WQ6ik8,11258
+upgini/utils/psi.py,sha256=vw8QEktXSx29IiMJMxmDeFU_4lJInJBXt_XL5Muekzo,11114
 upgini/utils/sample_utils.py,sha256=xpfYaZ2cYP7I2JrcooVc13QNBFawB81cJRuh38451Q4,15123
 upgini/utils/sklearn_ext.py,sha256=jLJWAKkqQinV15Z4y1ZnsN3c-fKFwXTsprs00COnyVU,49315
 upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
@@ -74,7 +74,7 @@ upgini/utils/target_utils.py,sha256=GCPn4QeJ83JJ_vyBJ3IhY5fyIRkLC9q9BE59S2FRO1I,
 upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
 upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
 upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
-upgini-1.2.117.dist-info/METADATA,sha256=3onXIkh96-rh_Q0DIuHUihe07upcEFukS5WsVS1R2yc,50743
-upgini-1.2.117.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-upgini-1.2.117.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
-upgini-1.2.117.dist-info/RECORD,,
+upgini-1.2.118.dist-info/METADATA,sha256=fEdGQaho0hyf9dXC_fL1AxuJFI46-zCMvm_U_O6hOec,50743
+upgini-1.2.118.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+upgini-1.2.118.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
+upgini-1.2.118.dist-info/RECORD,,

{upgini-1.2.117.dist-info → upgini-1.2.118.dist-info}/WHEEL RENAMED Viewed

File without changes

{upgini-1.2.117.dist-info → upgini-1.2.118.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

upgini 1.2.117__py3-none-any.whl → 1.2.118__py3-none-any.whl

upgini 1.2.117py3-none-any.whl → 1.2.118py3-none-any.whl