upgini 1.2.68a3832.dev4__tar.gz → 1.2.68a3832.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/PKG-INFO +1 -1
- upgini-1.2.68a3832.dev6/src/upgini/__about__.py +1 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/dataset.py +1 -1
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/metrics.py +52 -4
- upgini-1.2.68a3832.dev4/src/upgini/__about__.py +0 -1
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/.gitignore +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/LICENSE +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/README.md +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/pyproject.toml +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/ads.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/delta.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/trend.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/volatility.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/errors.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/features_enricher.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/http.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/metadata.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/search_task.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/spinner.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev6
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.68a3832.dev6"
|
|
@@ -388,7 +388,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
388
388
|
for col in columns_to_validate:
|
|
389
389
|
self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
|
|
390
390
|
if validate_target and target is not None and col == target:
|
|
391
|
-
self.data.loc[self.data[target] == np.
|
|
391
|
+
self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
|
|
392
392
|
|
|
393
393
|
if col in mandatory_columns:
|
|
394
394
|
self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]
|
|
@@ -101,6 +101,52 @@ LIGHTGBM_PARAMS = {
|
|
|
101
101
|
"min_sum_hessian_in_leaf": 0.01,
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
+
LIGHTGBM_REGRESSION_PARAMS = {
|
|
105
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
106
|
+
"n_estimators": 275,
|
|
107
|
+
"max_depth": 5,
|
|
108
|
+
"max_cat_threshold": 80,
|
|
109
|
+
"min_data_per_group": 25,
|
|
110
|
+
"cat_l2": 10,
|
|
111
|
+
"cat_smooth": 12,
|
|
112
|
+
"learning_rate": 0.05,
|
|
113
|
+
"feature_fraction": 1.0,
|
|
114
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
115
|
+
"objective": "huber",
|
|
116
|
+
"verbosity": 0,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
LIGHTGBM_MULTICLASS_PARAMS = {
|
|
120
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
121
|
+
"n_estimators": 275,
|
|
122
|
+
"max_depth": 3,
|
|
123
|
+
"max_cat_threshold": 80,
|
|
124
|
+
"min_data_per_group": 25,
|
|
125
|
+
"cat_l2": 10,
|
|
126
|
+
"cat_smooth": 12,
|
|
127
|
+
"learning_rate": 0.25, # CatBoost 0.25
|
|
128
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
129
|
+
"objective": "multiclass",
|
|
130
|
+
"class_weight": "balanced",
|
|
131
|
+
"verbosity": 0,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
LIGHTGBM_BINARY_PARAMS = {
|
|
135
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
136
|
+
"n_estimators": 275,
|
|
137
|
+
"max_depth": 5,
|
|
138
|
+
"max_cat_threshold": 80,
|
|
139
|
+
"min_data_per_group": 25,
|
|
140
|
+
"cat_l2": 10,
|
|
141
|
+
"cat_smooth": 12,
|
|
142
|
+
"learning_rate": 0.05,
|
|
143
|
+
"feature_fraction": 1.0,
|
|
144
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
145
|
+
"objective": "binary",
|
|
146
|
+
"class_weight": "balanced",
|
|
147
|
+
"verbosity": 0,
|
|
148
|
+
}
|
|
149
|
+
|
|
104
150
|
N_FOLDS = 5
|
|
105
151
|
BLOCKED_TS_TEST_SIZE = 0.2
|
|
106
152
|
|
|
@@ -441,28 +487,27 @@ class EstimatorWrapper:
|
|
|
441
487
|
}
|
|
442
488
|
if estimator is None:
|
|
443
489
|
params = {}
|
|
444
|
-
params["has_time"] = has_date
|
|
445
490
|
# if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
|
|
446
491
|
# params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
|
|
447
492
|
if target_type == ModelTaskType.MULTICLASS:
|
|
448
493
|
# params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
|
449
494
|
# params = _get_add_params(params, add_params)
|
|
450
495
|
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
451
|
-
params = _get_add_params(params,
|
|
496
|
+
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
|
452
497
|
params = _get_add_params(params, add_params)
|
|
453
498
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
454
499
|
elif target_type == ModelTaskType.BINARY:
|
|
455
500
|
# params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
|
|
456
501
|
# params = _get_add_params(params, add_params)
|
|
457
502
|
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
458
|
-
params = _get_add_params(params,
|
|
503
|
+
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
|
459
504
|
params = _get_add_params(params, add_params)
|
|
460
505
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
461
506
|
elif target_type == ModelTaskType.REGRESSION:
|
|
462
507
|
# params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
|
|
463
508
|
# params = _get_add_params(params, add_params)
|
|
464
509
|
# estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
|
|
465
|
-
params = _get_add_params(params,
|
|
510
|
+
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
|
466
511
|
params = _get_add_params(params, add_params)
|
|
467
512
|
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
|
468
513
|
else:
|
|
@@ -474,6 +519,7 @@ class EstimatorWrapper:
|
|
|
474
519
|
estimator_copy = deepcopy(estimator)
|
|
475
520
|
kwargs["estimator"] = estimator_copy
|
|
476
521
|
if is_catboost_estimator(estimator):
|
|
522
|
+
params["has_time"] = has_date
|
|
477
523
|
if cat_features is not None:
|
|
478
524
|
for cat_feature in cat_features:
|
|
479
525
|
if cat_feature not in x.columns:
|
|
@@ -712,6 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
712
758
|
|
|
713
759
|
def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
|
|
714
760
|
x, y, groups, params = super()._prepare_to_fit(x, y)
|
|
761
|
+
if self.target_type == ModelTaskType.MULTICLASS:
|
|
762
|
+
params["num_class"] = y.nunique()
|
|
715
763
|
self.cat_features = _get_cat_features(x)
|
|
716
764
|
x = fill_na_cat_features(x, self.cat_features)
|
|
717
765
|
for feature in self.cat_features:
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.dev4"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/ads_management/ads_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/autofe/timeseries/volatility.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/normalizer/normalize_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/exceptions.py
RENAMED
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/resource_bundle/strings.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/sampler/random_under_sampler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/base_search_key_detector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.68a3832.dev4 → upgini-1.2.68a3832.dev6}/src/upgini/utils/fallback_progress_bar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|