upgini 1.2.40__tar.gz → 1.2.41a3758.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/PKG-INFO +1 -1
- upgini-1.2.41a3758.dev1/src/upgini/__about__.py +1 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/features_enricher.py +24 -11
- upgini-1.2.40/src/upgini/__about__.py +0 -1
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/.gitignore +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/LICENSE +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/README.md +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/pyproject.toml +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/ads.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/dataset.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/errors.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/http.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/metadata.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/metrics.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.41a3758.dev1"
|
|
@@ -165,10 +165,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
165
165
|
|
|
166
166
|
shared_datasets: list of str, optional (default=None)
|
|
167
167
|
List of private shared dataset ids for custom search
|
|
168
|
-
|
|
169
|
-
select_features: bool, optional (default=False)
|
|
170
|
-
If True, return only selected features both from input and data sources.
|
|
171
|
-
Otherwise, return all features from input and only selected features from data sources.
|
|
172
168
|
"""
|
|
173
169
|
|
|
174
170
|
TARGET_NAME = "target"
|
|
@@ -235,7 +231,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
235
231
|
client_visitorid: Optional[str] = None,
|
|
236
232
|
custom_bundle_config: Optional[str] = None,
|
|
237
233
|
add_date_if_missing: bool = True,
|
|
238
|
-
select_features: bool = False,
|
|
239
234
|
disable_force_downsampling: bool = False,
|
|
240
235
|
id_columns: Optional[List[str]] = None,
|
|
241
236
|
**kwargs,
|
|
@@ -297,7 +292,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
297
292
|
self.dropped_client_feature_names_ = []
|
|
298
293
|
self.feature_importances_ = []
|
|
299
294
|
self.search_id = search_id
|
|
300
|
-
self.select_features = select_features
|
|
301
295
|
self.disable_force_downsampling = disable_force_downsampling
|
|
302
296
|
|
|
303
297
|
if search_id:
|
|
@@ -405,6 +399,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
405
399
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
406
400
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
407
401
|
search_id_callback: Optional[Callable[[str], Any]] = None,
|
|
402
|
+
select_features: bool = False,
|
|
408
403
|
**kwargs,
|
|
409
404
|
):
|
|
410
405
|
"""Fit to data.
|
|
@@ -440,6 +435,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
440
435
|
|
|
441
436
|
remove_outliers_calc_metrics, optional (default=True)
|
|
442
437
|
If True then rows with target ouliers will be dropped on metrics calculation
|
|
438
|
+
|
|
439
|
+
select_features: bool, optional (default=False)
|
|
440
|
+
If True, return only selected features both from input and data sources.
|
|
441
|
+
Otherwise, return all features from input and only selected features from data sources.
|
|
443
442
|
"""
|
|
444
443
|
trace_id = str(uuid.uuid4())
|
|
445
444
|
start_time = time.time()
|
|
@@ -474,6 +473,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
474
473
|
self.y = y
|
|
475
474
|
self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
|
|
476
475
|
self.dump_input(trace_id, X, y, self.eval_set)
|
|
476
|
+
self.__set_select_features(select_features)
|
|
477
477
|
self.__inner_fit(
|
|
478
478
|
trace_id,
|
|
479
479
|
X,
|
|
@@ -523,6 +523,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
523
523
|
finally:
|
|
524
524
|
self.logger.info(f"Fit elapsed time: {time.time() - start_time}")
|
|
525
525
|
|
|
526
|
+
def __set_select_features(self, select_features: bool):
|
|
527
|
+
self.fit_select_features = select_features
|
|
528
|
+
self.runtime_parameters.properties["select_features"] = select_features
|
|
529
|
+
|
|
526
530
|
def fit_transform(
|
|
527
531
|
self,
|
|
528
532
|
X: Union[pd.DataFrame, pd.Series, np.ndarray],
|
|
@@ -538,6 +542,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
538
542
|
estimator: Optional[Any] = None,
|
|
539
543
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
540
544
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
545
|
+
select_features: bool = False,
|
|
541
546
|
**kwargs,
|
|
542
547
|
) -> pd.DataFrame:
|
|
543
548
|
"""Fit to data, then transform it.
|
|
@@ -578,6 +583,10 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
578
583
|
remove_outliers_calc_metrics, optional (default=True)
|
|
579
584
|
If True then rows with target ouliers will be dropped on metrics calculation
|
|
580
585
|
|
|
586
|
+
select_features: bool, optional (default=False)
|
|
587
|
+
If True, return only selected features both from input and data sources.
|
|
588
|
+
Otherwise, return all features from input and only selected features from data sources.
|
|
589
|
+
|
|
581
590
|
Returns
|
|
582
591
|
-------
|
|
583
592
|
X_new: pandas.DataFrame of shape (n_samples, n_features_new)
|
|
@@ -612,6 +621,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
612
621
|
self.X = X
|
|
613
622
|
self.y = y
|
|
614
623
|
self.eval_set = self._check_eval_set(eval_set, X, self.bundle)
|
|
624
|
+
self.__set_select_features(select_features)
|
|
615
625
|
self.dump_input(trace_id, X, y, self.eval_set)
|
|
616
626
|
|
|
617
627
|
if _num_samples(drop_duplicates(X)) > Dataset.MAX_ROWS:
|
|
@@ -1231,8 +1241,11 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1231
1241
|
self.logger.info(f"Calculating metrics elapsed time: {time.time() - start_time}")
|
|
1232
1242
|
|
|
1233
1243
|
def _update_shap_values(self, trace_id: str, x_columns: List[str], new_shaps: Dict[str, float]):
|
|
1244
|
+
renaming = self.fit_columns_renaming or {}
|
|
1234
1245
|
new_shaps = {
|
|
1235
|
-
feature: _round_shap_value(shap)
|
|
1246
|
+
renaming.get(feature, feature): _round_shap_value(shap)
|
|
1247
|
+
for feature, shap in new_shaps.items()
|
|
1248
|
+
if feature in self.feature_names_ or renaming.get(feature, feature) in self.feature_names_
|
|
1236
1249
|
}
|
|
1237
1250
|
self.__prepare_feature_importances(trace_id, x_columns, new_shaps, silent=True)
|
|
1238
1251
|
|
|
@@ -1461,7 +1474,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1461
1474
|
c
|
|
1462
1475
|
for c in X_sampled.columns.to_list()
|
|
1463
1476
|
if (
|
|
1464
|
-
not self.
|
|
1477
|
+
not self.fit_select_features
|
|
1465
1478
|
or c in self.feature_names_
|
|
1466
1479
|
or (self.fit_columns_renaming is not None and self.fit_columns_renaming.get(c) in self.feature_names_)
|
|
1467
1480
|
)
|
|
@@ -3287,8 +3300,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3287
3300
|
f"Client ip: {self.client_ip}\n"
|
|
3288
3301
|
f"Client visitorId: {self.client_visitorid}\n"
|
|
3289
3302
|
f"Add date if missing: {self.add_date_if_missing}\n"
|
|
3290
|
-
f"Select features: {self.select_features}\n"
|
|
3291
3303
|
f"Disable force downsampling: {self.disable_force_downsampling}\n"
|
|
3304
|
+
f"Id columns: {self.id_columns}\n"
|
|
3292
3305
|
)
|
|
3293
3306
|
|
|
3294
3307
|
def sample(df):
|
|
@@ -3675,7 +3688,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3675
3688
|
is_client_feature = feature_meta.name in x_columns
|
|
3676
3689
|
|
|
3677
3690
|
if feature_meta.shap_value == 0.0:
|
|
3678
|
-
if self.
|
|
3691
|
+
if self.fit_select_features:
|
|
3679
3692
|
self.dropped_client_feature_names_.append(feature_meta.name)
|
|
3680
3693
|
continue
|
|
3681
3694
|
|
|
@@ -3684,7 +3697,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3684
3697
|
feature_meta.name in self.fit_generated_features
|
|
3685
3698
|
or feature_meta.name == COUNTRY
|
|
3686
3699
|
# In select_features mode we select also from etalon features and need to show them
|
|
3687
|
-
or (not self.
|
|
3700
|
+
or (not self.fit_select_features and is_client_feature)
|
|
3688
3701
|
):
|
|
3689
3702
|
continue
|
|
3690
3703
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.40"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.40 → upgini-1.2.41a3758.dev1}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|