upgini 1.2.81a3832.dev6__py3-none-any.whl → 1.2.81a3832.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.81a3832.dev6"
1
+ __version__ = "1.2.81a3832.dev8"
@@ -3934,6 +3934,7 @@ if response.status_code == 200:
3934
3934
  continue
3935
3935
 
3936
3936
  # Use only important features
3937
+ # If select_features is False, we don't show etalon features in the report
3937
3938
  if (
3938
3939
  # feature_meta.name in self.fit_generated_features or
3939
3940
  feature_meta.name == COUNTRY # constant synthetic column
upgini/metrics.py CHANGED
@@ -18,6 +18,7 @@ from numpy import log1p
18
18
  from pandas.api.types import is_numeric_dtype
19
19
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
20
20
 
21
+ from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
21
22
  from upgini.utils.features_validator import FeaturesValidator
22
23
  from upgini.utils.sklearn_ext import cross_validate
23
24
 
@@ -31,7 +32,7 @@ except ImportError:
31
32
  available_scorers = SCORERS
32
33
  from sklearn.metrics import mean_squared_error
33
34
  from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
34
- from sklearn.model_selection import BaseCrossValidator # , TimeSeriesSplit
35
+ from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit # , TimeSeriesSplit
35
36
 
36
37
  from upgini.errors import ValidationError
37
38
  from upgini.metadata import ModelTaskType
@@ -250,6 +251,8 @@ class _CrossValResults:
250
251
 
251
252
 
252
253
  class EstimatorWrapper:
254
+ default_estimator = "catboost"
255
+
253
256
  def __init__(
254
257
  self,
255
258
  estimator,
@@ -352,6 +355,7 @@ class EstimatorWrapper:
352
355
  self.logger.info("Calculate baseline GINI on passed baseline_score_column and target")
353
356
  metric = roc_auc_score(y, x[baseline_score_column])
354
357
  else:
358
+ self.logger.info(f"Cross validate with estimeator: {self.estimator}")
355
359
  cv_results = cross_validate(
356
360
  estimator=self.estimator,
357
361
  x=x,
@@ -458,31 +462,43 @@ class EstimatorWrapper:
458
462
  "logger": logger,
459
463
  }
460
464
  if estimator is None:
461
- params = {"has_time": has_date}
462
- if target_type == ModelTaskType.MULTICLASS:
463
- params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
464
- params = _get_add_params(params, add_params)
465
- estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
466
- # params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
467
- # params = _get_add_params(params, add_params)
468
- # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
469
- elif target_type == ModelTaskType.BINARY:
470
- params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
471
- params = _get_add_params(params, add_params)
472
- estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
473
- # params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
474
- # params = _get_add_params(params, add_params)
475
- # estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
476
- elif target_type == ModelTaskType.REGRESSION:
477
- params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
478
- params = _get_add_params(params, add_params)
479
- estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
480
- # if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
481
- # params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
482
- # params = _get_add_params(params, add_params)
483
- # estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
465
+ if EstimatorWrapper.default_estimator == "catboost":
466
+ logger.info("Using CatBoost as default estimator")
467
+ params = {"has_time": has_date}
468
+ if target_type == ModelTaskType.MULTICLASS:
469
+ params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
470
+ params = _get_add_params(params, add_params)
471
+ estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
472
+ elif target_type == ModelTaskType.BINARY:
473
+ params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
474
+ params = _get_add_params(params, add_params)
475
+ estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
476
+ elif target_type == ModelTaskType.REGRESSION:
477
+ params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
478
+ params = _get_add_params(params, add_params)
479
+ estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
480
+ else:
481
+ raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
482
+ elif EstimatorWrapper.default_estimator == "lightgbm":
483
+ logger.info("Using LightGBM as default estimator")
484
+ params = {"random_state": DEFAULT_RANDOM_STATE, "verbose": -1}
485
+ if target_type == ModelTaskType.MULTICLASS:
486
+ params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
487
+ params = _get_add_params(params, add_params)
488
+ estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
489
+ elif target_type == ModelTaskType.BINARY:
490
+ params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
491
+ params = _get_add_params(params, add_params)
492
+ estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
493
+ elif target_type == ModelTaskType.REGRESSION:
494
+ if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
495
+ params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
496
+ params = _get_add_params(params, add_params)
497
+ estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
498
+ else:
499
+ raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
484
500
  else:
485
- raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
501
+ raise Exception("Unsupported default_estimator. Available: catboost, lightgbm")
486
502
  else:
487
503
  if hasattr(estimator, "copy"):
488
504
  estimator_copy = estimator.copy()
@@ -490,8 +506,8 @@ class EstimatorWrapper:
490
506
  estimator_copy = deepcopy(estimator)
491
507
  kwargs["estimator"] = estimator_copy
492
508
  if is_catboost_estimator(estimator):
493
- if cat_features is not None:
494
- estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
509
+ if has_date is not None:
510
+ estimator_copy.set_params(has_time=has_date)
495
511
  estimator = CatBoostWrapper(**kwargs)
496
512
  else:
497
513
  if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
@@ -941,8 +957,8 @@ def _get_cat_features(
941
957
 
942
958
  logger.info(f"Selected categorical features: {cat_features}")
943
959
 
944
- non_encode_features = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
945
- features_to_encode = [f for f in cat_features if f not in non_encode_features]
960
+ features_to_encode = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
961
+ features_to_encode = [f for f in cat_features if f in features_to_encode]
946
962
 
947
963
  logger.info(f"Features to encode: {features_to_encode}")
948
964
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.81a3832.dev6
3
+ Version: 1.2.81a3832.dev8
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=yNrgPKOedmyNgT4TYavHML3irFQc9hNEAf0TxhtzLzA,33
1
+ upgini/__about__.py,sha256=M1jXitaZAXPLIGnBLF3YC2-DONuCmeKDqyDxngbrHI0,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=ODCSzFw62y_8vUrfbcZtDu0dWMIDCGYKWD2F54QDFII,210787
6
+ upgini/features_enricher.py,sha256=WCX50iuq8_hf9AYuEfs_ZWNR7FbFc44zuXg27Z40r2s,210874
7
7
  upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=lWFF_dQAWcgI7EOQlTXiLjsAEoPLxNv1PCp_egoKolc,38821
9
+ upgini/metrics.py,sha256=nT5eIVjGZp1U1oZUE82zBSniI9gaZDf6QhRlGKJkmQ4,39831
10
10
  upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.81a3832.dev6.dist-info/METADATA,sha256=WjpXtnU3FUqspcRA2Zl-5iMqo5fqT2xIhHPJXFPcPN4,49172
74
- upgini-1.2.81a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.81a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.81a3832.dev6.dist-info/RECORD,,
73
+ upgini-1.2.81a3832.dev8.dist-info/METADATA,sha256=KxJ6Hfdlki3UGenMTKIc5cc6-VnE9I34zacRVPn9lws,49172
74
+ upgini-1.2.81a3832.dev8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.81a3832.dev8.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.81a3832.dev8.dist-info/RECORD,,