upgini 1.1.306__tar.gz → 1.1.308__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.306 → upgini-1.1.308}/PKG-INFO +1 -1
- upgini-1.1.308/src/upgini/__about__.py +1 -0
- upgini-1.1.308/src/upgini/__init__.py +11 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/features_enricher.py +17 -38
- upgini-1.1.306/src/upgini/__about__.py +0 -1
- upgini-1.1.306/src/upgini/__init__.py +0 -7
- {upgini-1.1.306 → upgini-1.1.308}/.gitignore +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/LICENSE +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/README.md +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/pyproject.toml +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/ads.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/date.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/dataset.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/errors.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/http.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/lazy_import.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/metadata.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/metrics.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/search_task.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/spinner.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.306 → upgini-1.1.308}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.308"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from .lazy_import import LazyImport
|
|
4
|
+
|
|
5
|
+
os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
|
|
6
|
+
|
|
7
|
+
FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
|
|
8
|
+
SearchKey = LazyImport("upgini.metadata", "SearchKey")
|
|
9
|
+
RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
|
|
10
|
+
CVType = LazyImport("upgini.metadata", "CVType")
|
|
11
|
+
ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
|
|
@@ -90,7 +90,6 @@ from upgini.utils.display_utils import (
|
|
|
90
90
|
from upgini.utils.email_utils import EmailSearchKeyConverter, EmailSearchKeyDetector
|
|
91
91
|
from upgini.utils.features_validator import FeaturesValidator
|
|
92
92
|
from upgini.utils.format import Format
|
|
93
|
-
from upgini.utils.ip_utils import IpToCountrySearchKeyConverter
|
|
94
93
|
from upgini.utils.phone_utils import PhoneSearchKeyDetector
|
|
95
94
|
from upgini.utils.postal_code_utils import PostalCodeSearchKeyDetector
|
|
96
95
|
|
|
@@ -1213,13 +1212,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1213
1212
|
converter = EmailSearchKeyConverter(email_column, hem_column, search_keys, [], self.logger)
|
|
1214
1213
|
extended_X = converter.convert(extended_X)
|
|
1215
1214
|
generated_features.extend(converter.generated_features)
|
|
1216
|
-
if (
|
|
1217
|
-
self.detect_missing_search_keys
|
|
1218
|
-
and list(search_keys.values()) == [SearchKey.DATE]
|
|
1219
|
-
and self.country_code is None
|
|
1220
|
-
):
|
|
1221
|
-
converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
|
|
1222
|
-
extended_X = converter.convert(extended_X)
|
|
1223
1215
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1224
1216
|
|
|
1225
1217
|
return extended_X, search_keys
|
|
@@ -1987,13 +1979,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1987
1979
|
df = converter.convert(df)
|
|
1988
1980
|
generated_features.extend(converter.generated_features)
|
|
1989
1981
|
email_converted_to_hem = converter.email_converted_to_hem
|
|
1990
|
-
if (
|
|
1991
|
-
self.detect_missing_search_keys
|
|
1992
|
-
and list(search_keys.values()) == [SearchKey.DATE]
|
|
1993
|
-
and self.country_code is None
|
|
1994
|
-
):
|
|
1995
|
-
converter = IpToCountrySearchKeyConverter(search_keys, self.logger)
|
|
1996
|
-
df = converter.convert(df)
|
|
1997
1982
|
generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
1998
1983
|
|
|
1999
1984
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
@@ -2300,8 +2285,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2300
2285
|
self.fit_search_keys = self.search_keys.copy()
|
|
2301
2286
|
self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
|
|
2302
2287
|
|
|
2303
|
-
validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2304
|
-
|
|
2305
2288
|
maybe_date_column = self._get_date_column(self.fit_search_keys)
|
|
2306
2289
|
has_date = maybe_date_column is not None
|
|
2307
2290
|
model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
@@ -2322,9 +2305,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2322
2305
|
|
|
2323
2306
|
df = self.__handle_index_search_keys(df, self.fit_search_keys)
|
|
2324
2307
|
|
|
2325
|
-
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2326
|
-
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
2327
|
-
|
|
2328
2308
|
if DEFAULT_INDEX in df.columns:
|
|
2329
2309
|
msg = self.bundle.get("unsupported_index_column")
|
|
2330
2310
|
self.logger.info(msg)
|
|
@@ -2334,33 +2314,32 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2334
2314
|
|
|
2335
2315
|
df = self.__add_country_code(df, self.fit_search_keys)
|
|
2336
2316
|
|
|
2337
|
-
df = remove_fintech_duplicates(
|
|
2338
|
-
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
2339
|
-
)
|
|
2340
|
-
df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
|
|
2341
|
-
|
|
2342
|
-
date_column = self._get_date_column(self.fit_search_keys)
|
|
2343
|
-
self.__adjust_cv(df, date_column, model_task_type)
|
|
2344
|
-
|
|
2345
2317
|
self.fit_generated_features = []
|
|
2346
2318
|
|
|
2347
|
-
if
|
|
2348
|
-
converter = DateTimeSearchKeyConverter(
|
|
2319
|
+
if has_date:
|
|
2320
|
+
converter = DateTimeSearchKeyConverter(maybe_date_column, self.date_format, self.logger, bundle=self.bundle)
|
|
2349
2321
|
df = converter.convert(df, keep_time=True)
|
|
2350
|
-
self.logger.info(f"Date column after convertion: {df[
|
|
2322
|
+
self.logger.info(f"Date column after convertion: {df[maybe_date_column]}")
|
|
2351
2323
|
self.fit_generated_features.extend(converter.generated_features)
|
|
2352
2324
|
else:
|
|
2353
2325
|
self.logger.info("Input dataset hasn't date column")
|
|
2354
2326
|
if self.add_date_if_missing:
|
|
2355
2327
|
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2356
2328
|
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2329
|
+
# Checks that need validated date
|
|
2330
|
+
validate_dates_distribution(df, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2331
|
+
|
|
2332
|
+
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2333
|
+
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
2334
|
+
|
|
2335
|
+
self.__adjust_cv(df, maybe_date_column, model_task_type)
|
|
2336
|
+
|
|
2337
|
+
# TODO normalize and convert all columns
|
|
2338
|
+
|
|
2339
|
+
df = remove_fintech_duplicates(
|
|
2340
|
+
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
2341
|
+
)
|
|
2342
|
+
df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
|
|
2364
2343
|
|
|
2365
2344
|
# Explode multiple search keys
|
|
2366
2345
|
non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX] + list(self.fit_search_keys.keys())
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.306"
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
from .lazy_import import LazyImport
|
|
2
|
-
|
|
3
|
-
FeaturesEnricher = LazyImport('upgini.features_enricher', 'FeaturesEnricher')
|
|
4
|
-
SearchKey = LazyImport('upgini.metadata', 'SearchKey')
|
|
5
|
-
RuntimeParameters = LazyImport('upgini.metadata', 'RuntimeParameters')
|
|
6
|
-
CVType = LazyImport('upgini.metadata', 'CVType')
|
|
7
|
-
ModelTaskType = LazyImport('upgini.metadata', 'ModelTaskType')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|