upgini 1.2.68a3832.dev7__py3-none-any.whl → 1.2.68a3832.dev9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/metrics.py +5 -23
- upgini/utils/feature_info.py +2 -1
- {upgini-1.2.68a3832.dev7.dist-info → upgini-1.2.68a3832.dev9.dist-info}/METADATA +1 -1
- {upgini-1.2.68a3832.dev7.dist-info → upgini-1.2.68a3832.dev9.dist-info}/RECORD +7 -7
- {upgini-1.2.68a3832.dev7.dist-info → upgini-1.2.68a3832.dev9.dist-info}/WHEEL +0 -0
- {upgini-1.2.68a3832.dev7.dist-info → upgini-1.2.68a3832.dev9.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.
|
|
1
|
+
__version__ = "1.2.68a3832.dev9"
|
upgini/metrics.py
CHANGED
|
@@ -491,26 +491,15 @@ class EstimatorWrapper:
|
|
|
491
491
|
}
|
|
492
492
|
if estimator is None:
|
|
493
493
|
params = {}
|
|
494
|
-
# if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
|
|
495
|
-
# params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
|
|
496
494
|
if target_type == ModelTaskType.MULTICLASS:
|
|
497
|
-
# params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
|
498
|
-
# params = _get_add_params(params, add_params)
|
|
499
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
500
495
|
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
|
501
496
|
params = _get_add_params(params, add_params)
|
|
502
497
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
503
498
|
elif target_type == ModelTaskType.BINARY:
|
|
504
|
-
# params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
|
|
505
|
-
# params = _get_add_params(params, add_params)
|
|
506
|
-
# estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
|
|
507
499
|
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
|
508
500
|
params = _get_add_params(params, add_params)
|
|
509
501
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
510
502
|
elif target_type == ModelTaskType.REGRESSION:
|
|
511
|
-
# params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
|
|
512
|
-
# params = _get_add_params(params, add_params)
|
|
513
|
-
# estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
|
|
514
503
|
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
|
515
504
|
params = _get_add_params(params, add_params)
|
|
516
505
|
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
|
@@ -523,18 +512,19 @@ class EstimatorWrapper:
|
|
|
523
512
|
estimator_copy = deepcopy(estimator)
|
|
524
513
|
kwargs["estimator"] = estimator_copy
|
|
525
514
|
if is_catboost_estimator(estimator):
|
|
526
|
-
params["has_time"] = has_date
|
|
527
515
|
if cat_features is not None:
|
|
528
516
|
for cat_feature in cat_features:
|
|
529
517
|
if cat_feature not in x.columns:
|
|
530
518
|
logger.error(
|
|
531
519
|
f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
|
|
532
520
|
)
|
|
533
|
-
estimator_copy.set_params(cat_features=cat_features)
|
|
521
|
+
estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
|
|
534
522
|
estimator = CatBoostWrapper(**kwargs)
|
|
535
523
|
else:
|
|
536
524
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
537
525
|
estimator = LightGBMWrapper(**kwargs)
|
|
526
|
+
elif is_catboost_estimator(estimator):
|
|
527
|
+
estimator = CatBoostWrapper(**kwargs)
|
|
538
528
|
else:
|
|
539
529
|
logger.warning(
|
|
540
530
|
f"Unexpected estimator is used for metrics: {estimator}. "
|
|
@@ -761,20 +751,12 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
761
751
|
self.cat_features = None
|
|
762
752
|
|
|
763
753
|
def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
|
|
764
|
-
x,
|
|
765
|
-
if self.target_type == ModelTaskType.MULTICLASS:
|
|
766
|
-
params["num_class"] = y.nunique()
|
|
767
|
-
emb_pattern = r"(.+)_emb\d+"
|
|
768
|
-
emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
769
|
-
max_bin_by_feature_type = {
|
|
770
|
-
feature: 63 if feature in emb_features else 255 for feature in x.columns
|
|
771
|
-
}
|
|
772
|
-
params["max_bin_by_feature_type"] = max_bin_by_feature_type
|
|
754
|
+
x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
|
|
773
755
|
self.cat_features = _get_cat_features(x)
|
|
774
756
|
x = fill_na_cat_features(x, self.cat_features)
|
|
775
757
|
for feature in self.cat_features:
|
|
776
758
|
x[feature] = x[feature].astype("category").cat.codes
|
|
777
|
-
if not is_numeric_dtype(
|
|
759
|
+
if not is_numeric_dtype(y_numpy):
|
|
778
760
|
y = correct_string_target(y)
|
|
779
761
|
|
|
780
762
|
return x, y, groups, params
|
upgini/utils/feature_info.py
CHANGED
|
@@ -90,7 +90,8 @@ class FeatureInfo:
|
|
|
90
90
|
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
91
|
if data is not None and len(data) > 0 and feature_meta.name in data.columns:
|
|
92
92
|
if len(data) > 3:
|
|
93
|
-
|
|
93
|
+
rand = np.random.RandomState(42)
|
|
94
|
+
feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
94
95
|
else:
|
|
95
96
|
feature_sample = data[feature_meta.name].dropna().unique().tolist()
|
|
96
97
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev9
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=PUD99JYjzGuY1b4bkoVfexbGcQ412xzz1bitG33oTYM,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
|
|
@@ -7,7 +7,7 @@ upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,2
|
|
|
7
7
|
upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
10
|
+
upgini/metrics.py,sha256=5g2N-z7IFH52tkgeOQjOq2-lsn-s1S4PtEWlwoS14jg,37629
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
13
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
@@ -56,7 +56,7 @@ upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuM
|
|
|
56
56
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
57
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
58
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
59
|
-
upgini/utils/feature_info.py,sha256=
|
|
59
|
+
upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
|
|
60
60
|
upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
|
|
61
61
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
62
62
|
upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
|
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
|
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
73
|
-
upgini-1.2.68a3832.
|
|
74
|
-
upgini-1.2.68a3832.
|
|
75
|
-
upgini-1.2.68a3832.
|
|
76
|
-
upgini-1.2.68a3832.
|
|
73
|
+
upgini-1.2.68a3832.dev9.dist-info/METADATA,sha256=zgbDmansWs-GTVIg9WJrS86ZRfIZ4Psx-2Ao7JVFIQs,49149
|
|
74
|
+
upgini-1.2.68a3832.dev9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
75
|
+
upgini-1.2.68a3832.dev9.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
76
|
+
upgini-1.2.68a3832.dev9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|