upgini 1.2.79__py3-none-any.whl → 1.2.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +6 -5
- upgini/metrics.py +42 -42
- {upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/METADATA +1 -1
- {upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/RECORD +7 -7
- {upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/WHEEL +0 -0
- {upgini-1.2.79.dist-info → upgini-1.2.80.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.80"
|
upgini/features_enricher.py
CHANGED
@@ -1119,7 +1119,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1119
1119
|
self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
|
1120
1120
|
}
|
1121
1121
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
1122
|
-
|
1122
|
+
y_sorted
|
1123
1123
|
):
|
1124
1124
|
train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
1125
1125
|
# np.mean(validated_y), 4
|
@@ -1197,7 +1197,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1197
1197
|
# self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
|
1198
1198
|
}
|
1199
1199
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
1200
|
-
|
1200
|
+
eval_y_sorted
|
1201
1201
|
):
|
1202
1202
|
eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
1203
1203
|
# np.mean(validated_eval_set[idx][1]), 4
|
@@ -3886,9 +3886,10 @@ if response.status_code == 200:
|
|
3886
3886
|
if updated_shaps is not None:
|
3887
3887
|
updating_shap = updated_shaps.get(feature_meta.name)
|
3888
3888
|
if updating_shap is None:
|
3889
|
-
|
3890
|
-
|
3891
|
-
|
3889
|
+
if feature_meta.shap_value != 0.0:
|
3890
|
+
self.logger.warning(
|
3891
|
+
f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
|
3892
|
+
)
|
3892
3893
|
updating_shap = 0.0
|
3893
3894
|
feature_meta.shap_value = updating_shap
|
3894
3895
|
|
upgini/metrics.py
CHANGED
@@ -99,7 +99,7 @@ LIGHTGBM_REGRESSION_PARAMS = {
|
|
99
99
|
"min_sum_hessian_in_leaf": 0.01,
|
100
100
|
"objective": "huber",
|
101
101
|
"deterministic": "true",
|
102
|
-
"force_col_wise": "true",
|
102
|
+
# "force_col_wise": "true",
|
103
103
|
"verbosity": -1,
|
104
104
|
}
|
105
105
|
|
@@ -119,7 +119,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
119
119
|
"num_grad_quant_bins": "8",
|
120
120
|
"stochastic_rounding": "true",
|
121
121
|
"deterministic": "true",
|
122
|
-
"force_col_wise": "true",
|
122
|
+
# "force_col_wise": "true",
|
123
123
|
"verbosity": -1,
|
124
124
|
}
|
125
125
|
|
@@ -136,7 +136,7 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
136
136
|
"cat_smooth": 18,
|
137
137
|
"cat_l2": 8,
|
138
138
|
"deterministic": "true",
|
139
|
-
"force_col_wise": "true",
|
139
|
+
# "force_col_wise": "true",
|
140
140
|
"verbosity": -1,
|
141
141
|
}
|
142
142
|
|
@@ -145,33 +145,33 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
|
|
145
145
|
N_FOLDS = 5
|
146
146
|
BLOCKED_TS_TEST_SIZE = 0.2
|
147
147
|
|
148
|
-
NA_VALUES = [
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
]
|
173
|
-
|
174
|
-
NA_REPLACEMENT = "NA"
|
148
|
+
# NA_VALUES = [
|
149
|
+
# "",
|
150
|
+
# " ",
|
151
|
+
# " ",
|
152
|
+
# "#n/a",
|
153
|
+
# "#n/a n/a",
|
154
|
+
# "#na",
|
155
|
+
# "-1.#ind",
|
156
|
+
# "-1.#qnan",
|
157
|
+
# "-nan",
|
158
|
+
# "1.#ind",
|
159
|
+
# "1.#qnan",
|
160
|
+
# "n/a",
|
161
|
+
# "na",
|
162
|
+
# "null",
|
163
|
+
# "nan",
|
164
|
+
# "n/a",
|
165
|
+
# "nan",
|
166
|
+
# "none",
|
167
|
+
# "-",
|
168
|
+
# "undefined",
|
169
|
+
# "[[unknown]]",
|
170
|
+
# "[not provided]",
|
171
|
+
# "[unknown]",
|
172
|
+
# ]
|
173
|
+
|
174
|
+
# NA_REPLACEMENT = "NA"
|
175
175
|
|
176
176
|
SUPPORTED_CATBOOST_METRICS = {
|
177
177
|
s.upper(): s
|
@@ -758,8 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
758
758
|
params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
|
759
759
|
self.cat_features = _get_cat_features(x)
|
760
760
|
if self.cat_features:
|
761
|
-
x = fill_na_cat_features(x, self.cat_features)
|
762
|
-
encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value
|
761
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
762
|
+
encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
|
763
763
|
encoded = pd.DataFrame(
|
764
764
|
encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
765
765
|
)
|
@@ -773,7 +773,7 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
773
773
|
def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
|
774
774
|
x, y_numpy, params = super()._prepare_to_calculate(x, y)
|
775
775
|
if self.cat_features is not None:
|
776
|
-
x = fill_na_cat_features(x, self.cat_features)
|
776
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
777
777
|
if self.cat_encoder is not None:
|
778
778
|
x[self.cat_features] = pd.DataFrame(
|
779
779
|
self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
@@ -857,7 +857,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
|
|
857
857
|
self.cat_features = _get_cat_features(x)
|
858
858
|
num_features = [col for col in x.columns if col not in self.cat_features]
|
859
859
|
x[num_features] = x[num_features].fillna(-999)
|
860
|
-
x = fill_na_cat_features(x, self.cat_features)
|
860
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
861
861
|
# TODO use one-hot encoding if cardinality is less 50
|
862
862
|
for feature in self.cat_features:
|
863
863
|
x[feature] = x[feature].astype("category").cat.codes
|
@@ -870,7 +870,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
|
|
870
870
|
if self.cat_features is not None:
|
871
871
|
num_features = [col for col in x.columns if col not in self.cat_features]
|
872
872
|
x[num_features] = x[num_features].fillna(-999)
|
873
|
-
x = fill_na_cat_features(x, self.cat_features)
|
873
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
874
874
|
# TODO use one-hot encoding if cardinality is less 50
|
875
875
|
for feature in self.cat_features:
|
876
876
|
x[feature] = x[feature].astype("category").cat.codes
|
@@ -1056,10 +1056,10 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
|
|
1056
1056
|
return mse if squared else np.sqrt(mse)
|
1057
1057
|
|
1058
1058
|
|
1059
|
-
def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1059
|
+
# def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1060
|
+
# for c in cat_features:
|
1061
|
+
# if c in df.columns:
|
1062
|
+
# df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
|
1063
|
+
# na_filter = df[c].str.lower().isin(NA_VALUES)
|
1064
|
+
# df.loc[na_filter, c] = NA_REPLACEMENT
|
1065
|
+
# return df
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
|
7
7
|
upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
|
10
10
|
upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
|
74
|
+
upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.80.dist-info/RECORD,,
|
File without changes
|
File without changes
|