upgini 1.2.26__py3-none-any.whl → 1.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.26"
1
+ __version__ = "1.2.27"
upgini/__init__.py CHANGED
@@ -1,13 +1,5 @@
1
- import os
2
-
3
1
  from upgini.features_enricher import FeaturesEnricher # noqa: F401
4
2
  from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
5
- # from .lazy_import import LazyImport
6
-
7
- os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
3
+ import warnings
8
4
 
9
- # FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
10
- # SearchKey = LazyImport("upgini.metadata", "SearchKey")
11
- # RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
12
- # CVType = LazyImport("upgini.metadata", "CVType")
13
- # ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
5
+ warnings.filterwarnings("ignore", category=UserWarning, module="_distutils_hack")
@@ -2546,9 +2546,11 @@ class FeaturesEnricher(TransformerMixin):
2546
2546
  self.fit_generated_features.extend(generator.generated_features)
2547
2547
 
2548
2548
  # Checks that need validated date
2549
-
2550
- if not is_dates_distribution_valid(df, self.fit_search_keys):
2551
- self.__log_warning(bundle.get("x_unstable_by_date"))
2549
+ try:
2550
+ if not is_dates_distribution_valid(df, self.fit_search_keys):
2551
+ self.__log_warning(bundle.get("x_unstable_by_date"))
2552
+ except Exception:
2553
+ self.logger.exception("Failed to check dates distribution validity")
2552
2554
 
2553
2555
  if (
2554
2556
  is_numeric_dtype(df[self.TARGET_NAME])
@@ -4038,15 +4040,19 @@ class FeaturesEnricher(TransformerMixin):
4038
4040
  half_train = round(len(train) / 2)
4039
4041
  part1 = train[:half_train]
4040
4042
  part2 = train[half_train:]
4041
- train_psi = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
4042
- if train_psi > 0.2:
4043
- self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi))
4043
+ train_psi_result = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
4044
+ if isinstance(train_psi_result, Exception):
4045
+ self.logger.exception("Failed to calculate train PSI", train_psi_result)
4046
+ elif train_psi_result > 0.2:
4047
+ self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi_result))
4044
4048
 
4045
4049
  # 2. Check train-test PSI
4046
4050
  if eval1 is not None:
4047
- train_test_psi = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
4048
- if train_test_psi > 0.2:
4049
- self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi))
4051
+ train_test_psi_result = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
4052
+ if isinstance(train_test_psi_result, Exception):
4053
+ self.logger.exception("Failed to calculate test PSI", train_test_psi_result)
4054
+ elif train_test_psi_result > 0.2:
4055
+ self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi_result))
4050
4056
 
4051
4057
  def _dump_python_libs(self):
4052
4058
  try:
@@ -229,25 +229,25 @@ def balance_undersample(
229
229
  return resampled_data
230
230
 
231
231
 
232
- def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
233
- df = pd.concat([expected, actual])
232
+ def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Exception]:
233
+ try:
234
+ df = pd.concat([expected, actual])
234
235
 
235
- if is_bool_dtype(df):
236
- df = np.where(df, 1, 0)
236
+ if is_bool_dtype(df):
237
+ df = np.where(df, 1, 0)
237
238
 
238
- # Define the bins for the target variable
239
- df_min = df.min()
240
- df_max = df.max()
241
- bins = [df_min, (df_min + df_max) / 2, df_max]
239
+ # Define the bins for the target variable
240
+ df_min = df.min()
241
+ df_max = df.max()
242
+ bins = [df_min, (df_min + df_max) / 2, df_max]
242
243
 
243
- # Calculate the base distribution
244
- train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
244
+ # Calculate the base distribution
245
+ train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
245
246
 
246
- # Calculate the target distribution
247
- test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
247
+ # Calculate the target distribution
248
+ test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
248
249
 
249
- # Calculate the PSI
250
- try:
250
+ # Calculate the PSI
251
251
  return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
252
- except Exception:
253
- return np.nan
252
+ except Exception as e:
253
+ return e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.26
3
+ Version: 1.2.27
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=8uhHORUThu_bp8-miLA4O6Em4FxNLHbY_K0t053StGY,23
2
- upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
1
+ upgini/__about__.py,sha256=JKArgvnX6ljUI_WxYnXTejXGdjsA4KJ3Cy2xBcK4vh4,23
2
+ upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=V1zzUHYzmhdouZVdoFBbMH1OIqUCqV1p0mIXfcTvj6Y,193614
6
+ upgini/features_enricher.py,sha256=7xYxcLvxQgDX7vE2gWEbBPceAVeEgBVpu9xtBJvXpoQ,194078
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
@@ -55,10 +55,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
55
55
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
56
56
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
57
57
  upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
58
- upgini/utils/target_utils.py,sha256=8R11IpwHxaEPJ5T2lcxXyeGFDuN6vquwlJep4ack-Ug,10159
58
+ upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
59
59
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
60
60
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
61
- upgini-1.2.26.dist-info/METADATA,sha256=vPNo6WUZ5Ypkvs0dVfV8i5sch4pCwsC_QkOac5SOZqA,48578
62
- upgini-1.2.26.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
63
- upgini-1.2.26.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
64
- upgini-1.2.26.dist-info/RECORD,,
61
+ upgini-1.2.27.dist-info/METADATA,sha256=iSB1iB7EwBugIUf8DYOz9mEFqGewDAS49-hgYuhtrtU,48578
62
+ upgini-1.2.27.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
63
+ upgini-1.2.27.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
64
+ upgini-1.2.27.dist-info/RECORD,,