upgini 1.2.8__py3-none-any.whl → 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.8"
1
+ __version__ = "1.2.9"
@@ -1103,7 +1103,7 @@ class FeaturesEnricher(TransformerMixin):
1103
1103
  else:
1104
1104
  eval_uplift = None
1105
1105
 
1106
- effective_eval_set = eval_set if eval_set is not None else self.eval_set
1106
+ # effective_eval_set = eval_set if eval_set is not None else self.eval_set
1107
1107
  eval_metrics = {
1108
1108
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
1109
1109
  "quality_metrics_eval_segment"
@@ -1369,6 +1369,7 @@ class FeaturesEnricher(TransformerMixin):
1369
1369
  + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID]
1370
1370
  )
1371
1371
  ]
1372
+ self.logger.info(f"Client features column on prepare data for metrics: {client_features}")
1372
1373
 
1373
1374
  filtered_enriched_features = self.__filtered_enriched_features(
1374
1375
  importance_threshold,
@@ -1435,31 +1436,19 @@ class FeaturesEnricher(TransformerMixin):
1435
1436
  )
1436
1437
 
1437
1438
  fitting_eval_set_dict = {}
1439
+ fitting_x_columns = fitting_X.columns.to_list()
1440
+ self.logger.info(f"Final list of fitting X columns: {fitting_x_columns}")
1441
+ fitting_enriched_x_columns = fitting_enriched_X.columns.to_list()
1442
+ self.logger.info(f"Final list of fitting enriched X columns: {fitting_enriched_x_columns}")
1438
1443
  for idx, eval_tuple in eval_set_sampled_dict.items():
1439
1444
  eval_X_sampled, enriched_eval_X, eval_y_sampled = eval_tuple
1440
1445
  eval_X_sorted, eval_y_sorted = self._sort_by_system_record_id(eval_X_sampled, eval_y_sampled, self.cv)
1441
1446
  enriched_eval_X_sorted, enriched_eval_y_sorted = self._sort_by_system_record_id(
1442
1447
  enriched_eval_X, eval_y_sampled, self.cv
1443
1448
  )
1444
- fitting_eval_X = eval_X_sorted[client_features].copy()
1445
- fitting_enriched_eval_X = enriched_eval_X_sorted[
1446
- client_features + existing_filtered_enriched_features
1447
- ].copy()
1448
-
1449
- # # Drop high cardinality features in eval set
1450
- if len(columns_with_high_cardinality) > 0:
1451
- fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1452
- fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
1453
- columns=columns_with_high_cardinality, errors="ignore"
1454
- )
1455
- # Drop constant features in eval_set
1456
- if len(constant_columns) > 0:
1457
- fitting_eval_X = fitting_eval_X.drop(columns=constant_columns, errors="ignore")
1458
- fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=constant_columns, errors="ignore")
1459
- # Drop datetime features in eval_set
1460
- if len(datetime_features) > 0:
1461
- fitting_eval_X = fitting_eval_X.drop(columns=datetime_features, errors="ignore")
1462
- fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=datetime_features, errors="ignore")
1449
+ fitting_eval_X = eval_X_sorted[fitting_x_columns].copy()
1450
+ fitting_enriched_eval_X = enriched_eval_X_sorted[fitting_enriched_x_columns].copy()
1451
+
1463
1452
  # Convert bool to string in eval_set
1464
1453
  if len(bool_columns) > 0:
1465
1454
  fitting_eval_X[col] = fitting_eval_X[col].astype(str)
@@ -1680,6 +1669,7 @@ class FeaturesEnricher(TransformerMixin):
1680
1669
  X_sampled = enriched_Xy[x_columns].copy()
1681
1670
  y_sampled = enriched_Xy[TARGET].copy()
1682
1671
  enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX], errors="ignore")
1672
+ enriched_X_columns = enriched_X.columns.to_list()
1683
1673
 
1684
1674
  self.logger.info(f"Shape of enriched_X: {enriched_X.shape}")
1685
1675
  self.logger.info(f"Shape of X after sampling: {X_sampled.shape}")
@@ -1694,7 +1684,7 @@ class FeaturesEnricher(TransformerMixin):
1694
1684
  for idx in range(len(eval_set)):
1695
1685
  eval_X_sampled = enriched_eval_sets[idx + 1][x_columns].copy()
1696
1686
  eval_y_sampled = enriched_eval_sets[idx + 1][TARGET].copy()
1697
- enriched_eval_X = enriched_eval_sets[idx + 1].drop(columns=[TARGET, EVAL_SET_INDEX])
1687
+ enriched_eval_X = enriched_eval_sets[idx + 1][enriched_X_columns].copy()
1698
1688
  eval_set_sampled_dict[idx] = (eval_X_sampled, enriched_eval_X, eval_y_sampled)
1699
1689
 
1700
1690
  self.__cached_sampled_datasets = (
@@ -1773,12 +1763,13 @@ class FeaturesEnricher(TransformerMixin):
1773
1763
  X_sampled = enriched_Xy[x_columns].copy()
1774
1764
  y_sampled = enriched_Xy[TARGET].copy()
1775
1765
  enriched_X = enriched_Xy.drop(columns=[TARGET, EVAL_SET_INDEX])
1766
+ enriched_X_columns = enriched_X.columns.tolist()
1776
1767
 
1777
1768
  for idx in range(len(eval_set)):
1778
1769
  enriched_eval_xy = enriched_df.query(f"{EVAL_SET_INDEX} == {idx + 1}")
1779
1770
  eval_x_sampled = enriched_eval_xy[x_columns].copy()
1780
1771
  eval_y_sampled = enriched_eval_xy[TARGET].copy()
1781
- enriched_eval_x = enriched_eval_xy.drop(columns=[TARGET, EVAL_SET_INDEX])
1772
+ enriched_eval_x = enriched_eval_xy[enriched_X_columns].copy()
1782
1773
  eval_set_sampled_dict[idx] = (eval_x_sampled, enriched_eval_x, eval_y_sampled)
1783
1774
  else:
1784
1775
  self.logger.info("Transform without eval_set")
upgini/metrics.py CHANGED
@@ -254,6 +254,7 @@ class EstimatorWrapper:
254
254
  def _prepare_data(
255
255
  self, x: pd.DataFrame, y: pd.Series, groups: Optional[np.ndarray] = None
256
256
  ) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
257
+ self.logger.info(f"Before preparing data columns: {x.columns.to_list()}")
257
258
  for c in x.columns:
258
259
  if is_numeric_dtype(x[c]):
259
260
  x[c] = x[c].astype(float)
@@ -272,6 +273,7 @@ class EstimatorWrapper:
272
273
  else:
273
274
  x, y = self._remove_empty_target_rows(x, y)
274
275
 
276
+ self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
275
277
  return x, y, groups
276
278
 
277
279
  def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series]:
@@ -493,7 +495,9 @@ class CatBoostWrapper(EstimatorWrapper):
493
495
  if x[name].nunique() > 1:
494
496
  unique_cat_features.append(name)
495
497
  else:
498
+ self.logger.info(f"Drop column {name} on preparing data for fit")
496
499
  x = x.drop(columns=name)
500
+ self.exclude_features.append(name)
497
501
  self.cat_features = unique_cat_features
498
502
  if (
499
503
  hasattr(self.estimator, "get_param")
@@ -87,4 +87,4 @@ class FeaturesValidator:
87
87
 
88
88
  @staticmethod
89
89
  def find_constant_features(df: pd.DataFrame) -> List[str]:
90
- return [i for i in df if df[i].nunique() == 1]
90
+ return [i for i in df if df[i].nunique() <= 1]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.8
3
+ Version: 1.2.9
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=CfVXm0wwlKPW0khOcwhWw61TpgtZiLijCePsAIOK3aU,22
1
+ upgini/__about__.py,sha256=Oh3Y6CIypkhAjW-aquBTyP3_cA-gKgKTwq9EpcWpjps,22
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=J5-bn07iaJSXVN8COeu2RbDqZ4NTPd1L27HePaNh52o,188134
6
+ upgini/features_enricher.py,sha256=rC3Lq1KvwJdSiITAUfYzBxdRtPkpOo6X2fqc3wWQfM4,187594
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
10
- upgini/metrics.py,sha256=Tu5cN8RlhOSSMWUTXRSkdl8SWBqR1N_2eJpBum9pZxc,30926
10
+ upgini/metrics.py,sha256=Swp-innl6XrdK6Dy6uLTVxmkzPRqFbCxfYxQUsK_-w8,31222
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1389
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
47
47
  upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
48
48
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
- upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
50
+ upgini/utils/features_validator.py,sha256=LIF6YMpHlxCrVz6mvMpc1kfNTIMVGlNCor7IJTmlSfI,3307
51
51
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
52
52
  upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
53
53
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.8.dist-info/METADATA,sha256=ylfsesA9T7LuYFxcy-1HiIyMm5qgx8PmxOmkwk3K9sw,48607
61
- upgini-1.2.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.8.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.8.dist-info/RECORD,,
60
+ upgini-1.2.9.dist-info/METADATA,sha256=3mB0qUIeWVpka3vMeXq-t7STUZxVKQb5NpGBpFA9zlw,48607
61
+ upgini-1.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.9.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.9.dist-info/RECORD,,
File without changes