upgini 1.2.68a3832.dev6__py3-none-any.whl → 1.2.68a3832.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/metrics.py +38 -30
- {upgini-1.2.68a3832.dev6.dist-info → upgini-1.2.68a3832.dev8.dist-info}/METADATA +1 -1
- {upgini-1.2.68a3832.dev6.dist-info → upgini-1.2.68a3832.dev8.dist-info}/RECORD +6 -6
- {upgini-1.2.68a3832.dev6.dist-info → upgini-1.2.68a3832.dev8.dist-info}/WHEEL +0 -0
- {upgini-1.2.68a3832.dev6.dist-info → upgini-1.2.68a3832.dev8.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.
|
|
1
|
+
__version__ = "1.2.68a3832.dev8"
|
upgini/metrics.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
4
3
|
import inspect
|
|
5
4
|
import logging
|
|
6
5
|
import re
|
|
7
6
|
from collections import defaultdict
|
|
8
7
|
from copy import deepcopy
|
|
8
|
+
from dataclasses import dataclass
|
|
9
9
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
@@ -26,11 +26,8 @@ except ImportError:
|
|
|
26
26
|
from sklearn.metrics._scorer import SCORERS
|
|
27
27
|
|
|
28
28
|
available_scorers = SCORERS
|
|
29
|
-
from sklearn.metrics._regression import (
|
|
30
|
-
_check_reg_targets,
|
|
31
|
-
check_consistent_length,
|
|
32
|
-
)
|
|
33
29
|
from sklearn.metrics import mean_squared_error
|
|
30
|
+
from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
|
|
34
31
|
from sklearn.model_selection import BaseCrossValidator
|
|
35
32
|
|
|
36
33
|
from upgini.errors import ValidationError
|
|
@@ -102,37 +99,43 @@ LIGHTGBM_PARAMS = {
|
|
|
102
99
|
}
|
|
103
100
|
|
|
104
101
|
LIGHTGBM_REGRESSION_PARAMS = {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
102
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
103
|
+
"deterministic": True,
|
|
104
|
+
"min_gain_to_split": 0.001,
|
|
105
|
+
"n_estimators": 275,
|
|
106
|
+
"max_depth": 5,
|
|
107
|
+
"max_cat_threshold": 80,
|
|
108
|
+
"min_data_per_group": 25,
|
|
109
|
+
"cat_l2": 10,
|
|
110
|
+
"cat_smooth": 12,
|
|
111
|
+
"learning_rate": 0.05,
|
|
112
|
+
"feature_fraction": 1.0,
|
|
113
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
114
|
+
"objective": "huber",
|
|
115
|
+
"verbosity": -1,
|
|
117
116
|
}
|
|
118
117
|
|
|
119
118
|
LIGHTGBM_MULTICLASS_PARAMS = {
|
|
120
119
|
"random_state": DEFAULT_RANDOM_STATE,
|
|
120
|
+
"deterministic": True,
|
|
121
|
+
"min_gain_to_split": 0.001,
|
|
121
122
|
"n_estimators": 275,
|
|
122
123
|
"max_depth": 3,
|
|
123
124
|
"max_cat_threshold": 80,
|
|
124
125
|
"min_data_per_group": 25,
|
|
125
126
|
"cat_l2": 10,
|
|
126
127
|
"cat_smooth": 12,
|
|
127
|
-
"learning_rate": 0.25,
|
|
128
|
+
"learning_rate": 0.25, # CatBoost 0.25
|
|
128
129
|
"min_sum_hessian_in_leaf": 0.01,
|
|
129
|
-
"objective": "
|
|
130
|
-
"class_weight": "balanced",
|
|
131
|
-
"verbosity":
|
|
130
|
+
"objective": "softmax",
|
|
131
|
+
"class_weight": "balanced", # TODO pass dict with weights for each class
|
|
132
|
+
"verbosity": -1,
|
|
132
133
|
}
|
|
133
134
|
|
|
134
135
|
LIGHTGBM_BINARY_PARAMS = {
|
|
135
136
|
"random_state": DEFAULT_RANDOM_STATE,
|
|
137
|
+
"deterministic": True,
|
|
138
|
+
"min_gain_to_split": 0.001,
|
|
136
139
|
"n_estimators": 275,
|
|
137
140
|
"max_depth": 5,
|
|
138
141
|
"max_cat_threshold": 80,
|
|
@@ -143,8 +146,8 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
|
143
146
|
"feature_fraction": 1.0,
|
|
144
147
|
"min_sum_hessian_in_leaf": 0.01,
|
|
145
148
|
"objective": "binary",
|
|
146
|
-
"class_weight": "balanced",
|
|
147
|
-
"verbosity":
|
|
149
|
+
"class_weight": "balanced", # TODO pass dict with weights for each class
|
|
150
|
+
"verbosity": -1,
|
|
148
151
|
}
|
|
149
152
|
|
|
150
153
|
N_FOLDS = 5
|
|
@@ -266,6 +269,7 @@ SUPPORTED_CATBOOST_METRICS = {
|
|
|
266
269
|
def is_catboost_estimator(estimator):
|
|
267
270
|
try:
|
|
268
271
|
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
272
|
+
|
|
269
273
|
return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
|
|
270
274
|
except ImportError:
|
|
271
275
|
return False
|
|
@@ -487,8 +491,12 @@ class EstimatorWrapper:
|
|
|
487
491
|
}
|
|
488
492
|
if estimator is None:
|
|
489
493
|
params = {}
|
|
490
|
-
#
|
|
491
|
-
#
|
|
494
|
+
# emb_pattern = r"(.+)_emb\d+"
|
|
495
|
+
# emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
496
|
+
# max_bin_by_feature_type = {
|
|
497
|
+
# feature: 63 if feature in emb_features else 255 for feature in x.columns
|
|
498
|
+
# }
|
|
499
|
+
# params["max_bin_by_feature_type"] = max_bin_by_feature_type
|
|
492
500
|
if target_type == ModelTaskType.MULTICLASS:
|
|
493
501
|
# params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
|
|
494
502
|
# params = _get_add_params(params, add_params)
|
|
@@ -526,9 +534,7 @@ class EstimatorWrapper:
|
|
|
526
534
|
logger.error(
|
|
527
535
|
f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
|
|
528
536
|
)
|
|
529
|
-
estimator_copy.set_params(
|
|
530
|
-
cat_features=cat_features
|
|
531
|
-
)
|
|
537
|
+
estimator_copy.set_params(cat_features=cat_features)
|
|
532
538
|
estimator = CatBoostWrapper(**kwargs)
|
|
533
539
|
else:
|
|
534
540
|
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
@@ -576,8 +582,9 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
576
582
|
x, y, groups, params = super()._prepare_to_fit(x, y)
|
|
577
583
|
|
|
578
584
|
# Find embeddings
|
|
579
|
-
from catboost import CatBoostClassifier
|
|
580
585
|
import catboost
|
|
586
|
+
from catboost import CatBoostClassifier
|
|
587
|
+
|
|
581
588
|
if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
|
|
582
589
|
emb_pattern = r"(.+)_emb\d+"
|
|
583
590
|
self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
@@ -701,6 +708,7 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
701
708
|
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
|
|
702
709
|
try:
|
|
703
710
|
from catboost import Pool
|
|
711
|
+
|
|
704
712
|
# Create Pool for fold data, if need (for example, when categorical features are present)
|
|
705
713
|
fold_pool = Pool(
|
|
706
714
|
x,
|
|
@@ -781,8 +789,8 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
781
789
|
|
|
782
790
|
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
|
|
783
791
|
try:
|
|
784
|
-
import shap
|
|
785
792
|
import lightgbm as lgb
|
|
793
|
+
import shap
|
|
786
794
|
|
|
787
795
|
if not isinstance(estimator, (lgb.LGBMRegressor, lgb.LGBMClassifier)):
|
|
788
796
|
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev8
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=KMZpRXK_ksEVGZxYvE4jNHZgG-Ce5Wv3Crjnd_eiTNE,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
|
|
@@ -7,7 +7,7 @@ upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,2
|
|
|
7
7
|
upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
10
|
+
upgini/metrics.py,sha256=1YFj2tmnOYLL4-ZXNZJDYclZADX0w6556DlN6TOlZ44,38686
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
13
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
|
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
73
|
-
upgini-1.2.68a3832.
|
|
74
|
-
upgini-1.2.68a3832.
|
|
75
|
-
upgini-1.2.68a3832.
|
|
76
|
-
upgini-1.2.68a3832.
|
|
73
|
+
upgini-1.2.68a3832.dev8.dist-info/METADATA,sha256=LQi_ixiFjU2qIyoVie4__YDTl_2Tzp6bGlZHFLT5PP4,49149
|
|
74
|
+
upgini-1.2.68a3832.dev8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
75
|
+
upgini-1.2.68a3832.dev8.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
76
|
+
upgini-1.2.68a3832.dev8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|