upgini 1.2.88a3884.dev0__py3-none-any.whl → 1.2.88a3884.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.88a3884.dev0"
1
+ __version__ = "1.2.88a3884.dev1"
@@ -1671,6 +1671,10 @@ class FeaturesEnricher(TransformerMixin):
1671
1671
  enriched_eval_y_sorted,
1672
1672
  )
1673
1673
 
1674
+ fitting_X, fitting_enriched_X, fitting_eval_set_dict = self._convert_id_columns_to_int(
1675
+ fitting_X, fitting_enriched_X, fitting_eval_set_dict, columns_renaming
1676
+ )
1677
+
1674
1678
  return (
1675
1679
  validated_X,
1676
1680
  fitting_X,
@@ -1684,6 +1688,38 @@ class FeaturesEnricher(TransformerMixin):
1684
1688
  columns_renaming,
1685
1689
  )
1686
1690
 
1691
+ def _convert_id_columns_to_int(
1692
+ self,
1693
+ fitting_X: pd.DataFrame,
1694
+ fitting_enriched_X: pd.DataFrame,
1695
+ fitting_eval_set_dict: Dict[int, Tuple[pd.DataFrame, pd.Series]],
1696
+ columns_renaming: Dict[str, str] = {},
1697
+ ) -> pd.DataFrame:
1698
+ def _set_encoded(col_name: str, df: pd.DataFrame, slice: Tuple[int, int], combined_col: pd.Series):
1699
+ df[col_name] = combined_col.iloc[slice[0] : slice[1]]
1700
+ return slice[1]
1701
+
1702
+ inverse_columns_renaming = {v: k for k, v in columns_renaming.items()}
1703
+
1704
+ if self.id_columns:
1705
+ self.logger.info(f"Convert id columns to int: {self.id_columns}")
1706
+ for col in self.id_columns:
1707
+ col = inverse_columns_renaming.get(col, col)
1708
+ combined_col = pd.concat(
1709
+ [fitting_X[col], fitting_enriched_X[col]]
1710
+ + [eval_set_pair[0][col] for eval_set_pair in fitting_eval_set_dict.values()]
1711
+ )
1712
+ combined_col = combined_col.astype("category").cat.codes
1713
+ slice_end = _set_encoded(col, fitting_X, (0, len(fitting_X)), combined_col)
1714
+ slice_end = _set_encoded(
1715
+ col, fitting_enriched_X, (slice_end, slice_end + len(fitting_enriched_X)), combined_col
1716
+ )
1717
+ for eval_set_pair in fitting_eval_set_dict.values():
1718
+ slice_end = _set_encoded(
1719
+ col, eval_set_pair[0], (slice_end, slice_end + len(eval_set_pair[0])), combined_col
1720
+ )
1721
+ return fitting_X, fitting_enriched_X, fitting_eval_set_dict
1722
+
1687
1723
  @dataclass
1688
1724
  class _SampledDataForMetrics:
1689
1725
  X_sampled: pd.DataFrame
@@ -3976,7 +4012,7 @@ if response.status_code == 200:
3976
4012
  if features_meta is None:
3977
4013
  raise Exception(self.bundle.get("missing_features_meta"))
3978
4014
 
3979
- return [f.name for f in features_meta if f.type == "categorical"]
4015
+ return [f.name for f in features_meta if f.type == "categorical" and f.name not in self.id_columns]
3980
4016
 
3981
4017
  def __prepare_feature_importances(
3982
4018
  self, trace_id: str, df: pd.DataFrame, updated_shaps: Optional[Dict[str, float]] = None, silent=False
upgini/metrics.py CHANGED
@@ -332,7 +332,7 @@ class EstimatorWrapper:
332
332
  self.groups = groups
333
333
  self.text_features = text_features
334
334
  self.logger = logger or logging.getLogger()
335
- self.droped_features = []
335
+ self.dropped_features = []
336
336
  self.converted_to_int = []
337
337
  self.converted_to_str = []
338
338
  self.converted_to_numeric = []
@@ -381,10 +381,11 @@ class EstimatorWrapper:
381
381
  x, y, groups = self._prepare_data(x, y, groups=self.groups)
382
382
 
383
383
  self.logger.info(f"Before preparing data columns: {x.columns.to_list()}")
384
- self.droped_features = []
384
+ self.dropped_features = []
385
385
  self.converted_to_int = []
386
386
  self.converted_to_str = []
387
387
  self.converted_to_numeric = []
388
+
388
389
  for c in x.columns:
389
390
 
390
391
  if _get_unique_count(x[c]) < 2:
@@ -392,7 +393,7 @@ class EstimatorWrapper:
392
393
  if c in self.cat_features:
393
394
  self.cat_features.remove(c)
394
395
  x.drop(columns=[c], inplace=True)
395
- self.droped_features.append(c)
396
+ self.dropped_features.append(c)
396
397
  elif self.text_features is not None and c in self.text_features:
397
398
  x[c] = x[c].astype(str)
398
399
  self.converted_to_str.append(c)
@@ -427,16 +428,16 @@ class EstimatorWrapper:
427
428
  except (ValueError, TypeError):
428
429
  self.logger.warning(f"Remove feature {c} because it is not numeric and not in cat_features")
429
430
  x.drop(columns=[c], inplace=True)
430
- self.droped_features.append(c)
431
+ self.dropped_features.append(c)
431
432
 
432
433
  return x, y, groups, {}
433
434
 
434
435
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
435
436
  x, y, _ = self._prepare_data(x, y)
436
437
 
437
- if self.droped_features:
438
- self.logger.info(f"Drop features on calculate metrics: {self.droped_features}")
439
- x = x.drop(columns=self.droped_features)
438
+ if self.dropped_features:
439
+ self.logger.info(f"Drop features on calculate metrics: {self.dropped_features}")
440
+ x = x.drop(columns=self.dropped_features)
440
441
 
441
442
  if self.converted_to_int:
442
443
  self.logger.info(f"Convert to int features on calculate metrics: {self.converted_to_int}")
@@ -797,7 +798,7 @@ class CatBoostWrapper(EstimatorWrapper):
797
798
  )
798
799
  for f in high_cardinality_features:
799
800
  self.text_features.remove(f)
800
- self.droped_features.append(f)
801
+ self.dropped_features.append(f)
801
802
  x = x.drop(columns=f, errors="ignore")
802
803
  return super().cross_val_predict(x, y, baseline_score_column)
803
804
  else:
@@ -897,7 +898,7 @@ class LightGBMWrapper(EstimatorWrapper):
897
898
  for c in x.columns:
898
899
  if x[c].dtype not in ["category", "int64", "float64", "bool"]:
899
900
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
900
- self.droped_features.append(c)
901
+ self.dropped_features.append(c)
901
902
  x = x.drop(columns=c, errors="ignore")
902
903
  return x, y_numpy, groups, params
903
904
 
@@ -988,7 +989,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
988
989
  for c in x.columns:
989
990
  if x[c].dtype not in ["category", "int64", "float64", "bool"]:
990
991
  self.logger.warning(f"Feature {c} is not numeric and will be dropped")
991
- self.droped_features.append(c)
992
+ self.dropped_features.append(c)
992
993
  x = x.drop(columns=c, errors="ignore")
993
994
  return x, y_numpy, groups, params
994
995
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.88a3884.dev0
3
+ Version: 1.2.88a3884.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=9UxVEFo0h8LcuPSKD5JSZ_n02IZF15Ksx8d1ITu4M7U,33
1
+ upgini/__about__.py,sha256=RCAVI4TwhC_It_MBONjiSYbrXtFotET-nMOyORfyw40,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=fRtqSkXNONLnPe6cCL967GMt349FTIpXzy_u8LUKncw,35354
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=eFnJVb8jM1INlT-imfjafhWtOfx9EJv2HSvlfyGy0_U,216188
6
+ upgini/features_enricher.py,sha256=kkNePcLwHKNOLuZLDD8XcIHKVoo_VPUbUM4TSwey60I,218038
7
7
  upgini/http.py,sha256=6Qcepv0tDC72mBBJxYHnA2xqw6QwFaKrXN8o4vju8Es,44372
8
8
  upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
- upgini/metrics.py,sha256=zIOaiyfQLedU9Fk4877drnlWh-KiImSkZpPeiq6Xr1E,45295
9
+ upgini/metrics.py,sha256=ju7JPwLUe8vtFUGbBV6w6ecySd952XucrqToc1edVBs,45306
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.88a3884.dev0.dist-info/METADATA,sha256=e_lwt9ydR712gQBymukF9Lc2W-5aqj5nrZa-6T-UXA4,49172
74
- upgini-1.2.88a3884.dev0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
- upgini-1.2.88a3884.dev0.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.88a3884.dev0.dist-info/RECORD,,
73
+ upgini-1.2.88a3884.dev1.dist-info/METADATA,sha256=KPOdFTBugj7fEYybkMyXP9uABuM75J-eKJmF7V-mEMs,49172
74
+ upgini-1.2.88a3884.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
75
+ upgini-1.2.88a3884.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.88a3884.dev1.dist-info/RECORD,,