upgini 1.1.244a16__tar.gz → 1.1.244a18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.244a16/src/upgini.egg-info → upgini-1.1.244a18}/PKG-INFO +1 -1
- {upgini-1.1.244a16 → upgini-1.1.244a18}/setup.py +1 -1
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/features_enricher.py +3 -1
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/metrics.py +2 -2
- {upgini-1.1.244a16 → upgini-1.1.244a18/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.244a16 → upgini-1.1.244a18}/LICENSE +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/README.md +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/pyproject.toml +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/setup.cfg +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/ads.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/dataset.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/errors.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/http.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/metadata.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/search_task.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/spinner.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_country_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_email_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_metrics.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.244a16 → upgini-1.1.244a18}/tests/test_widget.py +0 -0
|
@@ -1337,7 +1337,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1337
1337
|
|
|
1338
1338
|
# Detect and drop high cardinality columns in train
|
|
1339
1339
|
columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
|
|
1340
|
-
columns_with_high_cardinality = [
|
|
1340
|
+
columns_with_high_cardinality = [
|
|
1341
|
+
c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
|
|
1342
|
+
]
|
|
1341
1343
|
self.logger.info(
|
|
1342
1344
|
f"Columns {columns_with_high_cardinality} will be dropped for metrics calculation due to high cardinality"
|
|
1343
1345
|
)
|
|
@@ -416,11 +416,12 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
416
416
|
if len(self.emb_features) > 3: # There is no reason to reduce embeddings dimension with less than 4
|
|
417
417
|
X, embedding_features = self.group_embeddings(X)
|
|
418
418
|
params["embedding_features"] = embedding_features
|
|
419
|
+
else:
|
|
420
|
+
self.emb_features = []
|
|
419
421
|
|
|
420
422
|
# Find text features from passed in generate_features
|
|
421
423
|
if self.text_features is not None:
|
|
422
424
|
self.text_features = [f for f in self.text_features if not is_numeric_dtype(X[f])]
|
|
423
|
-
|
|
424
425
|
params["text_features"] = self.text_features
|
|
425
426
|
|
|
426
427
|
# Find rest categorical features
|
|
@@ -464,7 +465,6 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
464
465
|
df = df.copy()
|
|
465
466
|
df[self.emb_features] = df[self.emb_features].fillna(0.0)
|
|
466
467
|
df[emb_name] = df[self.emb_features].values.tolist()
|
|
467
|
-
# TODO drop by condition
|
|
468
468
|
df = df.drop(columns=self.emb_features)
|
|
469
469
|
|
|
470
470
|
return df, [emb_name]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|