upgini 1.2.68a3832.dev8__tar.gz → 1.2.68a3832.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/PKG-INFO +1 -1
- upgini-1.2.68a3832.dev9/src/upgini/__about__.py +1 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/metrics.py +5 -21
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/feature_info.py +2 -1
- upgini-1.2.68a3832.dev8/src/upgini/__about__.py +0 -1
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/.gitignore +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/LICENSE +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/README.md +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/pyproject.toml +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/ads.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/dataset.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/errors.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/http.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/metadata.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/search_task.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/spinner.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev9
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.68a3832.dev9"
|
|
@@ -491,30 +491,15 @@ class EstimatorWrapper:
|
|
|
491
491
|
}
|
|
492
492
|
if estimator is None:
|
|
493
493
|
params = {}
|
|
494
|
-
# emb_pattern = r"(.+)_emb\d+"
|
|
495
|
-
# emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
496
|
-
# max_bin_by_feature_type = {
|
|
497
|
-
# feature: 63 if feature in emb_features else 255 for feature in x.columns
|
|
498
|
-
# }
|
|
499
|
-
# params["max_bin_by_feature_type"] = max_bin_by_feature_type
|
|
500
494
|
if target_type == ModelTaskType.MULTICLASS:
|
|
501
|
-
# params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
|
502
|
-
# params = _get_add_params(params, add_params)
|
|
503
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
504
495
|
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
|
505
496
|
params = _get_add_params(params, add_params)
|
|
506
497
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
507
498
|
elif target_type == ModelTaskType.BINARY:
|
|
508
|
-
# params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
|
|
509
|
-
# params = _get_add_params(params, add_params)
|
|
510
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
511
499
|
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
|
512
500
|
params = _get_add_params(params, add_params)
|
|
513
501
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
514
502
|
elif target_type == ModelTaskType.REGRESSION:
|
|
515
|
-
# params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
|
|
516
|
-
# params = _get_add_params(params, add_params)
|
|
517
|
-
# estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
|
|
518
503
|
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
|
519
504
|
params = _get_add_params(params, add_params)
|
|
520
505
|
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
|
@@ -527,18 +512,19 @@ class EstimatorWrapper:
|
|
|
527
512
|
estimator_copy = deepcopy(estimator)
|
|
528
513
|
kwargs["estimator"] = estimator_copy
|
|
529
514
|
if is_catboost_estimator(estimator):
|
|
530
|
-
params["has_time"] = has_date
|
|
531
515
|
if cat_features is not None:
|
|
532
516
|
for cat_feature in cat_features:
|
|
533
517
|
if cat_feature not in x.columns:
|
|
534
518
|
logger.error(
|
|
535
519
|
f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
|
|
536
520
|
)
|
|
537
|
-
estimator_copy.set_params(cat_features=cat_features)
|
|
521
|
+
estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
|
|
538
522
|
estimator = CatBoostWrapper(**kwargs)
|
|
539
523
|
else:
|
|
540
524
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
541
525
|
estimator = LightGBMWrapper(**kwargs)
|
|
526
|
+
elif is_catboost_estimator(estimator):
|
|
527
|
+
estimator = CatBoostWrapper(**kwargs)
|
|
542
528
|
else:
|
|
543
529
|
logger.warning(
|
|
544
530
|
f"Unexpected estimator is used for metrics: {estimator}. "
|
|
@@ -765,14 +751,12 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
765
751
|
self.cat_features = None
|
|
766
752
|
|
|
767
753
|
def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
|
|
768
|
-
x,
|
|
769
|
-
if self.target_type == ModelTaskType.MULTICLASS:
|
|
770
|
-
params["num_class"] = y.nunique()
|
|
754
|
+
x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
|
|
771
755
|
self.cat_features = _get_cat_features(x)
|
|
772
756
|
x = fill_na_cat_features(x, self.cat_features)
|
|
773
757
|
for feature in self.cat_features:
|
|
774
758
|
x[feature] = x[feature].astype("category").cat.codes
|
|
775
|
-
if not is_numeric_dtype(
|
|
759
|
+
if not is_numeric_dtype(y_numpy):
|
|
776
760
|
y = correct_string_target(y)
|
|
777
761
|
|
|
778
762
|
return x, y, groups, params
|
|
@@ -90,7 +90,8 @@ class FeatureInfo:
|
|
|
90
90
|
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
91
|
if data is not None and len(data) > 0 and feature_meta.name in data.columns:
|
|
92
92
|
if len(data) > 3:
|
|
93
|
-
|
|
93
|
+
rand = np.random.RandomState(42)
|
|
94
|
+
feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
94
95
|
else:
|
|
95
96
|
feature_sample = data[feature_meta.name].dropna().unique().tolist()
|
|
96
97
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.dev8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/autofe/timeseries/volatility.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/normalizer/normalize_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev9}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|