upgini 1.1.258__tar.gz → 1.1.260a1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.258/src/upgini.egg-info → upgini-1.1.260a1}/PKG-INFO +1 -1
- {upgini-1.1.258 → upgini-1.1.260a1}/setup.py +1 -1
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/dataset.py +2 -2
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/features_enricher.py +13 -9
- {upgini-1.1.258 → upgini-1.1.260a1/src/upgini.egg-info}/PKG-INFO +1 -1
- {upgini-1.1.258 → upgini-1.1.260a1}/LICENSE +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/README.md +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/pyproject.toml +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/setup.cfg +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/ads.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/errors.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/fingerprint.js +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/http.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/metadata.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/metrics.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/search_task.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/spinner.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini/version_validator.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini.egg-info/SOURCES.txt +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini.egg-info/dependency_links.txt +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini.egg-info/requires.txt +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/src/upgini.egg-info/top_level.txt +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_binary_dataset.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_blocked_time_series.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_categorical_dataset.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_continuous_dataset.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_country_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_custom_loss_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_datetime_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_email_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_etalon_validation.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_features_enricher.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_metrics.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_phone_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_postal_code_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_target_utils.py +0 -0
- {upgini-1.1.258 → upgini-1.1.260a1}/tests/test_widget.py +0 -0
|
@@ -223,11 +223,11 @@ class Dataset: # (pd.DataFrame):
|
|
|
223
223
|
self.data[col] = self.data[col].astype("str").str.slice(stop=self.MAX_STRING_FEATURE_LENGTH)
|
|
224
224
|
|
|
225
225
|
def __convert_bools(self):
|
|
226
|
-
"""Convert bool columns
|
|
226
|
+
"""Convert bool columns to string"""
|
|
227
227
|
# self.logger.info("Converting bool to int")
|
|
228
228
|
for col in self.data.columns:
|
|
229
229
|
if is_bool(self.data[col]):
|
|
230
|
-
self.data[col] = self.data[col].astype("
|
|
230
|
+
self.data[col] = self.data[col].astype("str")
|
|
231
231
|
|
|
232
232
|
def __convert_float16(self):
|
|
233
233
|
"""Convert float16 to float"""
|
|
@@ -1681,9 +1681,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1681
1681
|
eval_set_sampled_dict[idx] = (eval_x_sampled, enriched_eval_x, eval_y_sampled)
|
|
1682
1682
|
else:
|
|
1683
1683
|
self.logger.info("Transform without eval_set")
|
|
1684
|
-
df =
|
|
1684
|
+
df = validated_X.copy()
|
|
1685
1685
|
|
|
1686
1686
|
df[TARGET] = validated_y
|
|
1687
|
+
|
|
1688
|
+
df = clean_full_duplicates(df, logger=self.logger, silent=True, bundle=self.bundle)
|
|
1689
|
+
|
|
1687
1690
|
num_samples = _num_samples(df)
|
|
1688
1691
|
if num_samples > Dataset.FIT_SAMPLE_THRESHOLD:
|
|
1689
1692
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_ROWS}")
|
|
@@ -2884,19 +2887,20 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2884
2887
|
sort_columns = [date_column] if date_column is not None else []
|
|
2885
2888
|
|
|
2886
2889
|
other_search_keys = sorted(
|
|
2887
|
-
[
|
|
2888
|
-
|
|
2889
|
-
|
|
2890
|
-
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
]
|
|
2890
|
+
[c for c in df.columns if c not in sort_columns and df[c].nunique() > 1]
|
|
2891
|
+
# [
|
|
2892
|
+
# sk
|
|
2893
|
+
# for sk, key_type in search_keys.items()
|
|
2894
|
+
# if key_type not in [SearchKey.DATE, SearchKey.DATETIME]
|
|
2895
|
+
# and sk in df.columns
|
|
2896
|
+
# and df[sk].nunique() > 1 # don't use constant keys for hash
|
|
2897
|
+
# ]
|
|
2894
2898
|
)
|
|
2895
2899
|
|
|
2896
2900
|
search_keys_hash = "search_keys_hash"
|
|
2897
2901
|
if len(other_search_keys) > 0:
|
|
2898
2902
|
sort_columns.append(search_keys_hash)
|
|
2899
|
-
df[search_keys_hash] = pd.util.hash_pandas_object(df[
|
|
2903
|
+
df[search_keys_hash] = pd.util.hash_pandas_object(df[other_search_keys], index=False)
|
|
2900
2904
|
|
|
2901
2905
|
df = df.sort_values(by=sort_columns)
|
|
2902
2906
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|