upgini 1.1.306__py3-none-any.whl → 1.1.307__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/__init__.py +9 -5
- upgini/features_enricher.py +18 -16
- {upgini-1.1.306.dist-info → upgini-1.1.307.dist-info}/METADATA +1 -1
- {upgini-1.1.306.dist-info → upgini-1.1.307.dist-info}/RECORD +7 -7
- {upgini-1.1.306.dist-info → upgini-1.1.307.dist-info}/WHEEL +0 -0
- {upgini-1.1.306.dist-info → upgini-1.1.307.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.1.
|
|
1
|
+
__version__ = "1.1.307"
|
upgini/__init__.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from .lazy_import import LazyImport
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
os.environ["SETUPTOOLS_USE_DISTUTILS"] = "stdlib"
|
|
6
|
+
|
|
7
|
+
FeaturesEnricher = LazyImport("upgini.features_enricher", "FeaturesEnricher")
|
|
8
|
+
SearchKey = LazyImport("upgini.metadata", "SearchKey")
|
|
9
|
+
RuntimeParameters = LazyImport("upgini.metadata", "RuntimeParameters")
|
|
10
|
+
CVType = LazyImport("upgini.metadata", "CVType")
|
|
11
|
+
ModelTaskType = LazyImport("upgini.metadata", "ModelTaskType")
|
upgini/features_enricher.py
CHANGED
|
@@ -2300,8 +2300,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2300
2300
|
self.fit_search_keys = self.search_keys.copy()
|
|
2301
2301
|
self.fit_search_keys = self.__prepare_search_keys(validated_X, self.fit_search_keys, is_demo_dataset)
|
|
2302
2302
|
|
|
2303
|
-
validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2304
|
-
|
|
2305
2303
|
maybe_date_column = self._get_date_column(self.fit_search_keys)
|
|
2306
2304
|
has_date = maybe_date_column is not None
|
|
2307
2305
|
model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
|
|
@@ -2322,9 +2320,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2322
2320
|
|
|
2323
2321
|
df = self.__handle_index_search_keys(df, self.fit_search_keys)
|
|
2324
2322
|
|
|
2325
|
-
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2326
|
-
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
2327
|
-
|
|
2328
2323
|
if DEFAULT_INDEX in df.columns:
|
|
2329
2324
|
msg = self.bundle.get("unsupported_index_column")
|
|
2330
2325
|
self.logger.info(msg)
|
|
@@ -2334,26 +2329,33 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2334
2329
|
|
|
2335
2330
|
df = self.__add_country_code(df, self.fit_search_keys)
|
|
2336
2331
|
|
|
2337
|
-
df = remove_fintech_duplicates(
|
|
2338
|
-
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
2339
|
-
)
|
|
2340
|
-
df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
|
|
2341
|
-
|
|
2342
|
-
date_column = self._get_date_column(self.fit_search_keys)
|
|
2343
|
-
self.__adjust_cv(df, date_column, model_task_type)
|
|
2344
|
-
|
|
2345
2332
|
self.fit_generated_features = []
|
|
2346
2333
|
|
|
2347
|
-
if
|
|
2348
|
-
converter = DateTimeSearchKeyConverter(
|
|
2334
|
+
if has_date:
|
|
2335
|
+
converter = DateTimeSearchKeyConverter(maybe_date_column, self.date_format, self.logger, bundle=self.bundle)
|
|
2349
2336
|
df = converter.convert(df, keep_time=True)
|
|
2350
|
-
self.logger.info(f"Date column after convertion: {df[
|
|
2337
|
+
self.logger.info(f"Date column after convertion: {df[maybe_date_column]}")
|
|
2351
2338
|
self.fit_generated_features.extend(converter.generated_features)
|
|
2352
2339
|
else:
|
|
2353
2340
|
self.logger.info("Input dataset hasn't date column")
|
|
2354
2341
|
if self.add_date_if_missing:
|
|
2355
2342
|
df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
|
|
2356
2343
|
|
|
2344
|
+
# Checks that need validated date
|
|
2345
|
+
validate_dates_distribution(validated_X, self.fit_search_keys, self.logger, self.bundle, self.warning_counter)
|
|
2346
|
+
|
|
2347
|
+
if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
|
|
2348
|
+
self._validate_PSI(df.sort_values(by=maybe_date_column))
|
|
2349
|
+
|
|
2350
|
+
self.__adjust_cv(df, maybe_date_column, model_task_type)
|
|
2351
|
+
|
|
2352
|
+
# TODO normalize and convert all columns
|
|
2353
|
+
|
|
2354
|
+
df = remove_fintech_duplicates(
|
|
2355
|
+
df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
|
|
2356
|
+
)
|
|
2357
|
+
df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
|
|
2358
|
+
|
|
2357
2359
|
if (
|
|
2358
2360
|
self.detect_missing_search_keys
|
|
2359
2361
|
and list(self.fit_search_keys.values()) == [SearchKey.DATE]
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
2
|
-
upgini/__init__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=Sn0l4v1zhcKcI7IkU-4gpLRv1k03kr7gu6biIIfnoLM,24
|
|
2
|
+
upgini/__init__.py,sha256=Xs0YFVBu1KUdtZzbStGRPQtLt3YLzJnjx5nIUBlX8BE,415
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=MOzBVsvzlHLxNfPWtMaXC_jIPeW7_gUvbSGeXnsPgNI,46158
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=87gU9dwSjdqJ5ebOnDPk9gKvoLj6XZvpO5mkIbnaWNE,183601
|
|
7
7
|
upgini/http.py,sha256=a4Epc9YLIJBuYk4t8E_2-QDLBtJFqKO35jn2SnYQZCg,42920
|
|
8
8
|
upgini/lazy_import.py,sha256=EwoM0msNGbSmWBhGbrLDny1DSnOlvTxCjmMKPxYlDms,610
|
|
9
9
|
upgini/metadata.py,sha256=E5WWZ_MkjGyYNQh_LnwMIBHyqPx1fxk-qhEfQIJnzq8,10209
|
|
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
57
57
|
upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
|
|
58
58
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
59
59
|
upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
|
|
60
|
-
upgini-1.1.
|
|
61
|
-
upgini-1.1.
|
|
62
|
-
upgini-1.1.
|
|
63
|
-
upgini-1.1.
|
|
60
|
+
upgini-1.1.307.dist-info/METADATA,sha256=-HQpKKy8UA3w6J6Oi6WNYgqmaqCOWQmlVGaV7B5VBZc,48153
|
|
61
|
+
upgini-1.1.307.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
62
|
+
upgini-1.1.307.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
63
|
+
upgini-1.1.307.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|