upgini 1.2.81a3832.dev6__py3-none-any.whl → 1.2.81a3832.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +1 -0
- upgini/metrics.py +45 -29
- {upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/RECORD +7 -7
- {upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/WHEEL +0 -0
- {upgini-1.2.81a3832.dev6.dist-info → upgini-1.2.81a3832.dev8.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.81a3832.
|
1
|
+
__version__ = "1.2.81a3832.dev8"
|
upgini/features_enricher.py
CHANGED
@@ -3934,6 +3934,7 @@ if response.status_code == 200:
|
|
3934
3934
|
continue
|
3935
3935
|
|
3936
3936
|
# Use only important features
|
3937
|
+
# If select_features is False, we don't show etalon features in the report
|
3937
3938
|
if (
|
3938
3939
|
# feature_meta.name in self.fit_generated_features or
|
3939
3940
|
feature_meta.name == COUNTRY # constant synthetic column
|
upgini/metrics.py
CHANGED
@@ -18,6 +18,7 @@ from numpy import log1p
|
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
20
20
|
|
21
|
+
from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
|
21
22
|
from upgini.utils.features_validator import FeaturesValidator
|
22
23
|
from upgini.utils.sklearn_ext import cross_validate
|
23
24
|
|
@@ -31,7 +32,7 @@ except ImportError:
|
|
31
32
|
available_scorers = SCORERS
|
32
33
|
from sklearn.metrics import mean_squared_error
|
33
34
|
from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
|
34
|
-
from sklearn.model_selection import BaseCrossValidator # , TimeSeriesSplit
|
35
|
+
from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit # , TimeSeriesSplit
|
35
36
|
|
36
37
|
from upgini.errors import ValidationError
|
37
38
|
from upgini.metadata import ModelTaskType
|
@@ -250,6 +251,8 @@ class _CrossValResults:
|
|
250
251
|
|
251
252
|
|
252
253
|
class EstimatorWrapper:
|
254
|
+
default_estimator = "catboost"
|
255
|
+
|
253
256
|
def __init__(
|
254
257
|
self,
|
255
258
|
estimator,
|
@@ -352,6 +355,7 @@ class EstimatorWrapper:
|
|
352
355
|
self.logger.info("Calculate baseline GINI on passed baseline_score_column and target")
|
353
356
|
metric = roc_auc_score(y, x[baseline_score_column])
|
354
357
|
else:
|
358
|
+
self.logger.info(f"Cross validate with estimeator: {self.estimator}")
|
355
359
|
cv_results = cross_validate(
|
356
360
|
estimator=self.estimator,
|
357
361
|
x=x,
|
@@ -458,31 +462,43 @@ class EstimatorWrapper:
|
|
458
462
|
"logger": logger,
|
459
463
|
}
|
460
464
|
if estimator is None:
|
461
|
-
|
462
|
-
|
463
|
-
params =
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
465
|
+
if EstimatorWrapper.default_estimator == "catboost":
|
466
|
+
logger.info("Using CatBoost as default estimator")
|
467
|
+
params = {"has_time": has_date}
|
468
|
+
if target_type == ModelTaskType.MULTICLASS:
|
469
|
+
params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
470
|
+
params = _get_add_params(params, add_params)
|
471
|
+
estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
472
|
+
elif target_type == ModelTaskType.BINARY:
|
473
|
+
params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
|
474
|
+
params = _get_add_params(params, add_params)
|
475
|
+
estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
476
|
+
elif target_type == ModelTaskType.REGRESSION:
|
477
|
+
params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
|
478
|
+
params = _get_add_params(params, add_params)
|
479
|
+
estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
|
480
|
+
else:
|
481
|
+
raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
|
482
|
+
elif EstimatorWrapper.default_estimator == "lightgbm":
|
483
|
+
logger.info("Using LightGBM as default estimator")
|
484
|
+
params = {"random_state": DEFAULT_RANDOM_STATE, "verbose": -1}
|
485
|
+
if target_type == ModelTaskType.MULTICLASS:
|
486
|
+
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
487
|
+
params = _get_add_params(params, add_params)
|
488
|
+
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
489
|
+
elif target_type == ModelTaskType.BINARY:
|
490
|
+
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
491
|
+
params = _get_add_params(params, add_params)
|
492
|
+
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
493
|
+
elif target_type == ModelTaskType.REGRESSION:
|
494
|
+
if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
|
495
|
+
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
496
|
+
params = _get_add_params(params, add_params)
|
497
|
+
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
498
|
+
else:
|
499
|
+
raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
|
484
500
|
else:
|
485
|
-
raise Exception(
|
501
|
+
raise Exception("Unsupported default_estimator. Available: catboost, lightgbm")
|
486
502
|
else:
|
487
503
|
if hasattr(estimator, "copy"):
|
488
504
|
estimator_copy = estimator.copy()
|
@@ -490,8 +506,8 @@ class EstimatorWrapper:
|
|
490
506
|
estimator_copy = deepcopy(estimator)
|
491
507
|
kwargs["estimator"] = estimator_copy
|
492
508
|
if is_catboost_estimator(estimator):
|
493
|
-
if
|
494
|
-
estimator_copy.set_params(
|
509
|
+
if has_date is not None:
|
510
|
+
estimator_copy.set_params(has_time=has_date)
|
495
511
|
estimator = CatBoostWrapper(**kwargs)
|
496
512
|
else:
|
497
513
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
@@ -941,8 +957,8 @@ def _get_cat_features(
|
|
941
957
|
|
942
958
|
logger.info(f"Selected categorical features: {cat_features}")
|
943
959
|
|
944
|
-
|
945
|
-
features_to_encode = [f for f in cat_features if f
|
960
|
+
features_to_encode = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
|
961
|
+
features_to_encode = [f for f in cat_features if f in features_to_encode]
|
946
962
|
|
947
963
|
logger.info(f"Features to encode: {features_to_encode}")
|
948
964
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.81a3832.
|
3
|
+
Version: 1.2.81a3832.dev8
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=M1jXitaZAXPLIGnBLF3YC2-DONuCmeKDqyDxngbrHI0,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=WCX50iuq8_hf9AYuEfs_ZWNR7FbFc44zuXg27Z40r2s,210874
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=nT5eIVjGZp1U1oZUE82zBSniI9gaZDf6QhRlGKJkmQ4,39831
|
10
10
|
upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.81a3832.
|
74
|
-
upgini-1.2.81a3832.
|
75
|
-
upgini-1.2.81a3832.
|
76
|
-
upgini-1.2.81a3832.
|
73
|
+
upgini-1.2.81a3832.dev8.dist-info/METADATA,sha256=KxJ6Hfdlki3UGenMTKIc5cc6-VnE9I34zacRVPn9lws,49172
|
74
|
+
upgini-1.2.81a3832.dev8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81a3832.dev8.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3832.dev8.dist-info/RECORD,,
|
File without changes
|
File without changes
|