upgini 1.2.26__py3-none-any.whl → 1.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/__init__.py +2 -10
- upgini/features_enricher.py +15 -9
- upgini/utils/target_utils.py +16 -16
- {upgini-1.2.26.dist-info → upgini-1.2.27.dist-info}/METADATA +1 -1
- {upgini-1.2.26.dist-info → upgini-1.2.27.dist-info}/RECORD +8 -8
- {upgini-1.2.26.dist-info → upgini-1.2.27.dist-info}/WHEEL +0 -0
- {upgini-1.2.26.dist-info → upgini-1.2.27.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.27"
|
upgini/__init__.py
CHANGED
|
@@ -1,13 +1,5 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
1
|
from upgini.features_enricher import FeaturesEnricher # noqa: F401
|
|
4
2
|
from upgini.metadata import SearchKey, CVType, RuntimeParameters, ModelTaskType # noqa: F401
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
|
|
3
|
+
import warnings
|
|
8
4
|
|
|
9
|
-
|
|
10
|
-
# SearchKey = LazyImport("upgini.metadata", "SearchKey")
|
|
11
|
-
# RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
|
|
12
|
-
# CVType = LazyImport("upgini.metadata", "CVType")
|
|
13
|
-
# ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
|
|
5
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="_distutils_hack")
|
upgini/features_enricher.py
CHANGED
|
@@ -2546,9 +2546,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2546
2546
|
self.fit_generated_features.extend(generator.generated_features)
|
|
2547
2547
|
|
|
2548
2548
|
# Checks that need validated date
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2549
|
+
try:
|
|
2550
|
+
if not is_dates_distribution_valid(df, self.fit_search_keys):
|
|
2551
|
+
self.__log_warning(bundle.get("x_unstable_by_date"))
|
|
2552
|
+
except Exception:
|
|
2553
|
+
self.logger.exception("Failed to check dates distribution validity")
|
|
2552
2554
|
|
|
2553
2555
|
if (
|
|
2554
2556
|
is_numeric_dtype(df[self.TARGET_NAME])
|
|
@@ -4038,15 +4040,19 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
4038
4040
|
half_train = round(len(train) / 2)
|
|
4039
4041
|
part1 = train[:half_train]
|
|
4040
4042
|
part2 = train[half_train:]
|
|
4041
|
-
|
|
4042
|
-
if
|
|
4043
|
-
self.
|
|
4043
|
+
train_psi_result = calculate_psi(part1[self.TARGET_NAME], part2[self.TARGET_NAME])
|
|
4044
|
+
if isinstance(train_psi_result, Exception):
|
|
4045
|
+
self.logger.exception("Failed to calculate train PSI", train_psi_result)
|
|
4046
|
+
elif train_psi_result > 0.2:
|
|
4047
|
+
self.__log_warning(self.bundle.get("train_unstable_target").format(train_psi_result))
|
|
4044
4048
|
|
|
4045
4049
|
# 2. Check train-test PSI
|
|
4046
4050
|
if eval1 is not None:
|
|
4047
|
-
|
|
4048
|
-
if
|
|
4049
|
-
self.
|
|
4051
|
+
train_test_psi_result = calculate_psi(train[self.TARGET_NAME], eval1[self.TARGET_NAME])
|
|
4052
|
+
if isinstance(train_test_psi_result, Exception):
|
|
4053
|
+
self.logger.exception("Failed to calculate test PSI", train_test_psi_result)
|
|
4054
|
+
elif train_test_psi_result > 0.2:
|
|
4055
|
+
self.__log_warning(self.bundle.get("eval_unstable_target").format(train_test_psi_result))
|
|
4050
4056
|
|
|
4051
4057
|
def _dump_python_libs(self):
|
|
4052
4058
|
try:
|
upgini/utils/target_utils.py
CHANGED
|
@@ -229,25 +229,25 @@ def balance_undersample(
|
|
|
229
229
|
return resampled_data
|
|
230
230
|
|
|
231
231
|
|
|
232
|
-
def calculate_psi(expected: pd.Series, actual: pd.Series) -> float:
|
|
233
|
-
|
|
232
|
+
def calculate_psi(expected: pd.Series, actual: pd.Series) -> Union[float, Exception]:
|
|
233
|
+
try:
|
|
234
|
+
df = pd.concat([expected, actual])
|
|
234
235
|
|
|
235
|
-
|
|
236
|
-
|
|
236
|
+
if is_bool_dtype(df):
|
|
237
|
+
df = np.where(df, 1, 0)
|
|
237
238
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
239
|
+
# Define the bins for the target variable
|
|
240
|
+
df_min = df.min()
|
|
241
|
+
df_max = df.max()
|
|
242
|
+
bins = [df_min, (df_min + df_max) / 2, df_max]
|
|
242
243
|
|
|
243
|
-
|
|
244
|
-
|
|
244
|
+
# Calculate the base distribution
|
|
245
|
+
train_distribution = expected.value_counts(bins=bins, normalize=True).sort_index().values
|
|
245
246
|
|
|
246
|
-
|
|
247
|
-
|
|
247
|
+
# Calculate the target distribution
|
|
248
|
+
test_distribution = actual.value_counts(bins=bins, normalize=True).sort_index().values
|
|
248
249
|
|
|
249
|
-
|
|
250
|
-
try:
|
|
250
|
+
# Calculate the PSI
|
|
251
251
|
return np.sum((train_distribution - test_distribution) * np.log(train_distribution / test_distribution))
|
|
252
|
-
except Exception:
|
|
253
|
-
return
|
|
252
|
+
except Exception as e:
|
|
253
|
+
return e
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
2
|
-
upgini/__init__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=JKArgvnX6ljUI_WxYnXTejXGdjsA4KJ3Cy2xBcK4vh4,23
|
|
2
|
+
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=7xYxcLvxQgDX7vE2gWEbBPceAVeEgBVpu9xtBJvXpoQ,194078
|
|
7
7
|
upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
|
|
@@ -55,10 +55,10 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
|
|
|
55
55
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
56
56
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
57
57
|
upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
|
|
58
|
-
upgini/utils/target_utils.py,sha256=
|
|
58
|
+
upgini/utils/target_utils.py,sha256=PU77nIhTz7IHbC4rpTpxrVxib6cdpRL9F1dhkjIffLY,10225
|
|
59
59
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
60
60
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
61
|
-
upgini-1.2.
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
61
|
+
upgini-1.2.27.dist-info/METADATA,sha256=iSB1iB7EwBugIUf8DYOz9mEFqGewDAS49-hgYuhtrtU,48578
|
|
62
|
+
upgini-1.2.27.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
63
|
+
upgini-1.2.27.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
64
|
+
upgini-1.2.27.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|