upgini 1.2.68a3832.dev8__tar.gz → 1.2.68a3832.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/PKG-INFO +1 -1
- upgini-1.2.68a3832.dev10/src/upgini/__about__.py +1 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/metrics.py +9 -22
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/feature_info.py +2 -1
- upgini-1.2.68a3832.dev8/src/upgini/__about__.py +0 -1
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/.gitignore +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/LICENSE +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/README.md +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/pyproject.toml +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/dataset.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/errors.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/http.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/metadata.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/search_task.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/spinner.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev10
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.68a3832.dev10"
|
|
@@ -127,8 +127,11 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
|
127
127
|
"cat_smooth": 12,
|
|
128
128
|
"learning_rate": 0.25, # CatBoost 0.25
|
|
129
129
|
"min_sum_hessian_in_leaf": 0.01,
|
|
130
|
-
"objective": "softmax",
|
|
131
130
|
"class_weight": "balanced", # TODO pass dict with weights for each class
|
|
131
|
+
"objective": "multiclass",
|
|
132
|
+
"use_quantized_grad": "true",
|
|
133
|
+
"num_grad_quant_bins": "8",
|
|
134
|
+
"stochastic_rounding": "true",
|
|
132
135
|
"verbosity": -1,
|
|
133
136
|
}
|
|
134
137
|
|
|
@@ -491,30 +494,15 @@ class EstimatorWrapper:
|
|
|
491
494
|
}
|
|
492
495
|
if estimator is None:
|
|
493
496
|
params = {}
|
|
494
|
-
# emb_pattern = r"(.+)_emb\d+"
|
|
495
|
-
# emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
496
|
-
# max_bin_by_feature_type = {
|
|
497
|
-
# feature: 63 if feature in emb_features else 255 for feature in x.columns
|
|
498
|
-
# }
|
|
499
|
-
# params["max_bin_by_feature_type"] = max_bin_by_feature_type
|
|
500
497
|
if target_type == ModelTaskType.MULTICLASS:
|
|
501
|
-
# params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
|
502
|
-
# params = _get_add_params(params, add_params)
|
|
503
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
504
498
|
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
|
505
499
|
params = _get_add_params(params, add_params)
|
|
506
500
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
507
501
|
elif target_type == ModelTaskType.BINARY:
|
|
508
|
-
# params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
|
|
509
|
-
# params = _get_add_params(params, add_params)
|
|
510
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
511
502
|
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
|
512
503
|
params = _get_add_params(params, add_params)
|
|
513
504
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
514
505
|
elif target_type == ModelTaskType.REGRESSION:
|
|
515
|
-
# params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
|
|
516
|
-
# params = _get_add_params(params, add_params)
|
|
517
|
-
# estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
|
|
518
506
|
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
|
519
507
|
params = _get_add_params(params, add_params)
|
|
520
508
|
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
|
@@ -527,18 +515,19 @@ class EstimatorWrapper:
|
|
|
527
515
|
estimator_copy = deepcopy(estimator)
|
|
528
516
|
kwargs["estimator"] = estimator_copy
|
|
529
517
|
if is_catboost_estimator(estimator):
|
|
530
|
-
params["has_time"] = has_date
|
|
531
518
|
if cat_features is not None:
|
|
532
519
|
for cat_feature in cat_features:
|
|
533
520
|
if cat_feature not in x.columns:
|
|
534
521
|
logger.error(
|
|
535
522
|
f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
|
|
536
523
|
)
|
|
537
|
-
estimator_copy.set_params(cat_features=cat_features)
|
|
524
|
+
estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
|
|
538
525
|
estimator = CatBoostWrapper(**kwargs)
|
|
539
526
|
else:
|
|
540
527
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
541
528
|
estimator = LightGBMWrapper(**kwargs)
|
|
529
|
+
elif is_catboost_estimator(estimator):
|
|
530
|
+
estimator = CatBoostWrapper(**kwargs)
|
|
542
531
|
else:
|
|
543
532
|
logger.warning(
|
|
544
533
|
f"Unexpected estimator is used for metrics: {estimator}. "
|
|
@@ -765,14 +754,12 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
765
754
|
self.cat_features = None
|
|
766
755
|
|
|
767
756
|
def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
|
|
768
|
-
x,
|
|
769
|
-
if self.target_type == ModelTaskType.MULTICLASS:
|
|
770
|
-
params["num_class"] = y.nunique()
|
|
757
|
+
x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
|
|
771
758
|
self.cat_features = _get_cat_features(x)
|
|
772
759
|
x = fill_na_cat_features(x, self.cat_features)
|
|
773
760
|
for feature in self.cat_features:
|
|
774
761
|
x[feature] = x[feature].astype("category").cat.codes
|
|
775
|
-
if not is_numeric_dtype(
|
|
762
|
+
if not is_numeric_dtype(y_numpy):
|
|
776
763
|
y = correct_string_target(y)
|
|
777
764
|
|
|
778
765
|
return x, y, groups, params
|
|
@@ -90,7 +90,8 @@ class FeatureInfo:
|
|
|
90
90
|
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
91
|
if data is not None and len(data) > 0 and feature_meta.name in data.columns:
|
|
92
92
|
if len(data) > 3:
|
|
93
|
-
|
|
93
|
+
rand = np.random.RandomState(42)
|
|
94
|
+
feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
94
95
|
else:
|
|
95
96
|
feature_sample = data[feature_meta.name].dropna().unique().tolist()
|
|
96
97
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.dev8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/volatility.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/normalizer/normalize_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/blocked_time_series.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|