upgini 1.2.29a3__tar.gz → 1.2.29a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.29a3 → upgini-1.2.29a4}/PKG-INFO +1 -1
- upgini-1.2.29a4/src/upgini/__about__.py +1 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/features_enricher.py +11 -8
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/metrics.py +33 -9
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/datetime_utils.py +2 -2
- upgini-1.2.29a3/src/upgini/__about__.py +0 -1
- {upgini-1.2.29a3 → upgini-1.2.29a4}/.gitignore +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/LICENSE +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/README.md +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/pyproject.toml +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/ads.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/dataset.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/errors.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/http.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/metadata.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/search_task.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/spinner.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.29a3 → upgini-1.2.29a4}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.29a4"
|
|
@@ -999,9 +999,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
999
999
|
text_features=self.generate_features,
|
|
1000
1000
|
has_date=has_date,
|
|
1001
1001
|
)
|
|
1002
|
-
|
|
1002
|
+
etalon_cv_result = baseline_estimator.cross_val_predict(
|
|
1003
1003
|
fitting_X, y_sorted, self.baseline_score_column
|
|
1004
1004
|
)
|
|
1005
|
+
etalon_metric = etalon_cv_result.get_display_metric()
|
|
1005
1006
|
if etalon_metric is None:
|
|
1006
1007
|
self.logger.info(
|
|
1007
1008
|
f"Baseline {metric} on train client features is None (maybe all features was removed)"
|
|
@@ -1033,9 +1034,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1033
1034
|
text_features=self.generate_features,
|
|
1034
1035
|
has_date=has_date,
|
|
1035
1036
|
)
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1037
|
+
enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
|
1038
|
+
enriched_metric = enriched_cv_result.get_display_metric()
|
|
1039
|
+
enriched_shaps = enriched_cv_result.shap_values
|
|
1039
1040
|
|
|
1040
1041
|
if enriched_shaps is not None:
|
|
1041
1042
|
self._update_shap_values(enriched_shaps)
|
|
@@ -1048,7 +1049,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1048
1049
|
else:
|
|
1049
1050
|
self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
|
|
1050
1051
|
if etalon_metric is not None and enriched_metric is not None:
|
|
1051
|
-
uplift = (
|
|
1052
|
+
uplift = (enriched_cv_result.metric - etalon_cv_result.metric) * multiplier
|
|
1052
1053
|
|
|
1053
1054
|
train_metrics = {
|
|
1054
1055
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
|
@@ -1091,9 +1092,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1091
1092
|
f"Calculate baseline {metric} on eval set {idx + 1} "
|
|
1092
1093
|
f"on client features: {eval_X_sorted.columns.to_list()}"
|
|
1093
1094
|
)
|
|
1094
|
-
|
|
1095
|
+
etalon_eval_results = baseline_estimator.calculate_metric(
|
|
1095
1096
|
eval_X_sorted, eval_y_sorted, self.baseline_score_column
|
|
1096
1097
|
)
|
|
1098
|
+
etalon_eval_metric = etalon_eval_results.get_display_metric()
|
|
1097
1099
|
self.logger.info(
|
|
1098
1100
|
f"Baseline {metric} on eval set {idx + 1} client features: {etalon_eval_metric}"
|
|
1099
1101
|
)
|
|
@@ -1105,9 +1107,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1105
1107
|
f"Calculate enriched {metric} on eval set {idx + 1} "
|
|
1106
1108
|
f"on combined features: {enriched_eval_X_sorted.columns.to_list()}"
|
|
1107
1109
|
)
|
|
1108
|
-
|
|
1110
|
+
enriched_eval_results = enriched_estimator.calculate_metric(
|
|
1109
1111
|
enriched_eval_X_sorted, enriched_eval_y_sorted
|
|
1110
1112
|
)
|
|
1113
|
+
enriched_eval_metric = enriched_eval_results.get_display_metric()
|
|
1111
1114
|
self.logger.info(
|
|
1112
1115
|
f"Enriched {metric} on eval set {idx + 1} combined features: {enriched_eval_metric}"
|
|
1113
1116
|
)
|
|
@@ -1115,7 +1118,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1115
1118
|
enriched_eval_metric = None
|
|
1116
1119
|
|
|
1117
1120
|
if etalon_eval_metric is not None and enriched_eval_metric is not None:
|
|
1118
|
-
eval_uplift = (
|
|
1121
|
+
eval_uplift = (enriched_eval_results.metric - etalon_eval_results.metric) * multiplier
|
|
1119
1122
|
else:
|
|
1120
1123
|
eval_uplift = None
|
|
1121
1124
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from dataclasses import dataclass
|
|
3
4
|
import inspect
|
|
4
5
|
import logging
|
|
5
6
|
import re
|
|
@@ -210,6 +211,21 @@ SUPPORTED_CATBOOST_METRICS = {
|
|
|
210
211
|
}
|
|
211
212
|
|
|
212
213
|
|
|
214
|
+
@dataclass
|
|
215
|
+
class _CrossValResults:
|
|
216
|
+
metric: Optional[float]
|
|
217
|
+
metric_std: Optional[float]
|
|
218
|
+
shap_values: Optional[Dict[str, float]]
|
|
219
|
+
|
|
220
|
+
def get_display_metric(self) -> Optional[str]:
|
|
221
|
+
if self.metric is None:
|
|
222
|
+
return None
|
|
223
|
+
elif self.metric_std is None:
|
|
224
|
+
return f"{self.metric:.3f}"
|
|
225
|
+
else:
|
|
226
|
+
return f"{self.metric:.3f} ± {self.metric_std:.3f}"
|
|
227
|
+
|
|
228
|
+
|
|
213
229
|
class EstimatorWrapper:
|
|
214
230
|
def __init__(
|
|
215
231
|
self,
|
|
@@ -297,11 +313,11 @@ class EstimatorWrapper:
|
|
|
297
313
|
|
|
298
314
|
def cross_val_predict(
|
|
299
315
|
self, x: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
|
|
300
|
-
) ->
|
|
316
|
+
) -> _CrossValResults:
|
|
301
317
|
x, y, groups, fit_params = self._prepare_to_fit(x, y)
|
|
302
318
|
|
|
303
319
|
if x.shape[1] == 0:
|
|
304
|
-
return None
|
|
320
|
+
return _CrossValResults(metric=None, metric_std=None, shap_values=None)
|
|
305
321
|
|
|
306
322
|
scorer = check_scoring(self.estimator, scoring=self.scorer)
|
|
307
323
|
|
|
@@ -326,7 +342,7 @@ class EstimatorWrapper:
|
|
|
326
342
|
|
|
327
343
|
self.check_fold_metrics(metrics_by_fold)
|
|
328
344
|
|
|
329
|
-
metric =
|
|
345
|
+
metric, metric_std = self._calculate_metric_from_folds(metrics_by_fold)
|
|
330
346
|
|
|
331
347
|
splits = self.cv.split(x, y, groups)
|
|
332
348
|
|
|
@@ -351,7 +367,7 @@ class EstimatorWrapper:
|
|
|
351
367
|
else:
|
|
352
368
|
average_shap_values = None
|
|
353
369
|
|
|
354
|
-
return
|
|
370
|
+
return _CrossValResults(metric=metric, metric_std=metric_std, shap_values=average_shap_values)
|
|
355
371
|
|
|
356
372
|
def process_shap_values(self, shap_values: Dict[str, float]) -> Dict[str, float]:
|
|
357
373
|
return shap_values
|
|
@@ -367,17 +383,25 @@ class EstimatorWrapper:
|
|
|
367
383
|
metric = 2 * metric - 1
|
|
368
384
|
return metric
|
|
369
385
|
|
|
370
|
-
def calculate_metric(
|
|
386
|
+
def calculate_metric(
|
|
387
|
+
self, x: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
|
|
388
|
+
) -> _CrossValResults:
|
|
371
389
|
x, y, _ = self._prepare_to_calculate(x, y)
|
|
372
390
|
if baseline_score_column is not None and self.metric_name == "GINI":
|
|
373
|
-
metric = roc_auc_score(y, x[baseline_score_column])
|
|
391
|
+
metric, metric_std = roc_auc_score(y, x[baseline_score_column]), None
|
|
374
392
|
else:
|
|
375
393
|
metrics = []
|
|
376
394
|
for est in self.cv_estimators:
|
|
377
395
|
metrics.append(self.scorer(est, x, y))
|
|
378
396
|
|
|
379
|
-
metric =
|
|
380
|
-
return
|
|
397
|
+
metric, metric_std = self._calculate_metric_from_folds(metrics)
|
|
398
|
+
return _CrossValResults(metric=metric, metric_std=metric_std, shap_values=None)
|
|
399
|
+
|
|
400
|
+
def _calculate_metric_from_folds(self, metrics_by_fold: List[float]) -> Tuple[float, float]:
|
|
401
|
+
metrics_by_fold = [self.post_process_metric(m) for m in metrics_by_fold]
|
|
402
|
+
metric = np.mean(metrics_by_fold) * self.multiplier
|
|
403
|
+
metric_std = np.std(metrics_by_fold) * np.abs(self.multiplier)
|
|
404
|
+
return metric, metric_std
|
|
381
405
|
|
|
382
406
|
@staticmethod
|
|
383
407
|
def create(
|
|
@@ -591,7 +615,7 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
591
615
|
|
|
592
616
|
def cross_val_predict(
|
|
593
617
|
self, x: pd.DataFrame, y: np.ndarray, baseline_score_column: Optional[Any] = None
|
|
594
|
-
) ->
|
|
618
|
+
) -> _CrossValResults:
|
|
595
619
|
try:
|
|
596
620
|
return super().cross_val_predict(x, y, baseline_score_column)
|
|
597
621
|
except Exception as e:
|
|
@@ -111,7 +111,7 @@ class DateTimeSearchKeyConverter:
|
|
|
111
111
|
|
|
112
112
|
# Define function to apply sine and cosine transformations
|
|
113
113
|
def add_cyclical_features(df, column, period):
|
|
114
|
-
period_suffix = f"_{period}" if column !=
|
|
114
|
+
period_suffix = f"_{period}" if column != "day_in_quarter" else ""
|
|
115
115
|
sin_feature = f"datetime_{column}_sin{period_suffix}"
|
|
116
116
|
cos_feature = f"datetime_{column}_cos{period_suffix}"
|
|
117
117
|
df[sin_feature] = np.sin(2 * np.pi * df[column] / period)
|
|
@@ -138,7 +138,7 @@ class DateTimeSearchKeyConverter:
|
|
|
138
138
|
|
|
139
139
|
end = pd.to_datetime({"year": quarter_end_year, "month": quarter_end_month, "day": 1})
|
|
140
140
|
|
|
141
|
-
df["days_in_quarter"] = (end - start).dt.days
|
|
141
|
+
df["days_in_quarter"] = (end.reset_index(drop=True) - start.reset_index(drop=True)).dt.days
|
|
142
142
|
|
|
143
143
|
add_cyclical_features(df, "day_in_quarter", df["days_in_quarter"]) # Days in the quarter
|
|
144
144
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.29a3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|