upgini 1.2.113a4__tar.gz → 1.2.113a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.113a4 → upgini-1.2.113a5}/PKG-INFO +1 -1
- upgini-1.2.113a5/src/upgini/__about__.py +1 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/features_enricher.py +17 -9
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/psi.py +26 -0
- upgini-1.2.113a4/src/upgini/__about__.py +0 -1
- {upgini-1.2.113a4 → upgini-1.2.113a5}/.gitignore +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/LICENSE +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/README.md +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/pyproject.toml +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/ads.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/dataset.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/errors.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/http.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/metadata.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/metrics.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/search_task.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/spinner.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.113a4 → upgini-1.2.113a5}/src/upgini/version_validator.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "1.2.113a5"
|
@@ -112,7 +112,7 @@ except Exception:
|
|
112
112
|
CustomFallbackProgressBar as ProgressBar,
|
113
113
|
)
|
114
114
|
|
115
|
-
from upgini.utils.psi import calculate_features_psi
|
115
|
+
from upgini.utils.psi import calculate_features_psi, calculate_sparsity_psi
|
116
116
|
from upgini.utils.sample_utils import SampleColumns, SampleConfig, _num_samples, sample
|
117
117
|
from upgini.utils.sort import sort_columns
|
118
118
|
from upgini.utils.target_utils import calculate_psi, define_task
|
@@ -1513,15 +1513,29 @@ class FeaturesEnricher(TransformerMixin):
|
|
1513
1513
|
|
1514
1514
|
checking_eval_set_df[date_column] = eval_set_dates[selected_eval_set_idx]
|
1515
1515
|
|
1516
|
+
psi_values_sparse = calculate_sparsity_psi(
|
1517
|
+
checking_eval_set_df, cat_features, date_column, self.logger, model_task_type
|
1518
|
+
)
|
1519
|
+
|
1520
|
+
unstable_by_sparsity = [feature for feature, psi in psi_values_sparse.items() if psi > stability_threshold]
|
1521
|
+
if unstable_by_sparsity:
|
1522
|
+
self.logger.info(f"Unstable by sparsity features: {sorted(unstable_by_sparsity)}")
|
1523
|
+
|
1516
1524
|
psi_values = calculate_features_psi(
|
1517
1525
|
checking_eval_set_df, cat_features, date_column, self.logger, model_task_type
|
1518
1526
|
)
|
1519
1527
|
|
1528
|
+
unstable_by_value = [feature for feature, psi in psi_values.items() if psi > stability_threshold]
|
1529
|
+
if unstable_by_value:
|
1530
|
+
self.logger.info(f"Unstable by value features: {sorted(unstable_by_value)}")
|
1531
|
+
|
1520
1532
|
self.psi_values = {
|
1521
1533
|
feature: psi_value for feature, psi_value in psi_values.items() if psi_value <= stability_threshold
|
1522
1534
|
}
|
1523
1535
|
|
1524
|
-
|
1536
|
+
total_unstable_features = sorted(set(unstable_by_sparsity + unstable_by_value))
|
1537
|
+
|
1538
|
+
return total_unstable_features
|
1525
1539
|
|
1526
1540
|
def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
|
1527
1541
|
renaming = self.fit_columns_renaming or {}
|
@@ -2273,13 +2287,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
2273
2287
|
enriched_df, x_columns, enriched_X.columns.tolist(), len(eval_set) if has_eval_set else 0
|
2274
2288
|
)
|
2275
2289
|
|
2276
|
-
|
2277
|
-
reversed_renaming = {v: k for k, v in columns_renaming.items()}
|
2278
|
-
X_sampled.rename(columns=reversed_renaming, inplace=True)
|
2279
|
-
enriched_X.rename(columns=reversed_renaming, inplace=True)
|
2280
|
-
for _, (eval_X_sampled, enriched_eval_X, _) in eval_set_sampled_dict.items():
|
2281
|
-
eval_X_sampled.rename(columns=reversed_renaming, inplace=True)
|
2282
|
-
enriched_eval_X.rename(columns=reversed_renaming, inplace=True)
|
2290
|
+
search_keys = {columns_renaming.get(k, k): v for k, v in search_keys.items()}
|
2283
2291
|
|
2284
2292
|
# Cache and return results
|
2285
2293
|
datasets_hash = hash_input(validated_X, validated_y, eval_set)
|
@@ -42,6 +42,32 @@ DEFAULT_FEATURES_PARAMS = StabilityParams(
|
|
42
42
|
)
|
43
43
|
|
44
44
|
|
45
|
+
def calculate_sparsity_psi(
|
46
|
+
df: pd.DataFrame,
|
47
|
+
cat_features: list[str],
|
48
|
+
date_column: str,
|
49
|
+
logger: logging.Logger,
|
50
|
+
model_task_type: ModelTaskType,
|
51
|
+
psi_features_params: StabilityParams = DEFAULT_FEATURES_PARAMS,
|
52
|
+
psi_target_params: StabilityParams = DEFAULT_TARGET_PARAMS,
|
53
|
+
) -> Dict[str, float]:
|
54
|
+
sparse_features = df.columns[df.isna().sum() > 0].to_list()
|
55
|
+
if len(sparse_features) > 0:
|
56
|
+
logger.info(f"Calculating sparsity stability for {len(sparse_features)} sparse features")
|
57
|
+
sparse_df = df[sparse_features].notna()
|
58
|
+
sparse_df[date_column] = df[date_column]
|
59
|
+
return calculate_features_psi(
|
60
|
+
sparse_df,
|
61
|
+
cat_features,
|
62
|
+
date_column,
|
63
|
+
logger,
|
64
|
+
model_task_type,
|
65
|
+
psi_target_params,
|
66
|
+
psi_features_params,
|
67
|
+
)
|
68
|
+
return {}
|
69
|
+
|
70
|
+
|
45
71
|
def calculate_features_psi(
|
46
72
|
df: pd.DataFrame,
|
47
73
|
cat_features: list[str],
|
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = "1.2.113a4"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|