upgini 1.2.79a1__py3-none-any.whl → 1.2.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +6 -5
- upgini/metrics.py +42 -45
- {upgini-1.2.79a1.dist-info → upgini-1.2.80.dist-info}/METADATA +1 -1
- {upgini-1.2.79a1.dist-info → upgini-1.2.80.dist-info}/RECORD +7 -7
- {upgini-1.2.79a1.dist-info → upgini-1.2.80.dist-info}/WHEEL +0 -0
- {upgini-1.2.79a1.dist-info → upgini-1.2.80.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.80"
|
upgini/features_enricher.py
CHANGED
@@ -1119,7 +1119,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1119
1119
|
self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
|
1120
1120
|
}
|
1121
1121
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
1122
|
-
|
1122
|
+
y_sorted
|
1123
1123
|
):
|
1124
1124
|
train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
1125
1125
|
# np.mean(validated_y), 4
|
@@ -1197,7 +1197,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1197
1197
|
# self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
|
1198
1198
|
}
|
1199
1199
|
if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
|
1200
|
-
|
1200
|
+
eval_y_sorted
|
1201
1201
|
):
|
1202
1202
|
eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
|
1203
1203
|
# np.mean(validated_eval_set[idx][1]), 4
|
@@ -3886,9 +3886,10 @@ if response.status_code == 200:
|
|
3886
3886
|
if updated_shaps is not None:
|
3887
3887
|
updating_shap = updated_shaps.get(feature_meta.name)
|
3888
3888
|
if updating_shap is None:
|
3889
|
-
|
3890
|
-
|
3891
|
-
|
3889
|
+
if feature_meta.shap_value != 0.0:
|
3890
|
+
self.logger.warning(
|
3891
|
+
f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
|
3892
|
+
)
|
3892
3893
|
updating_shap = 0.0
|
3893
3894
|
feature_meta.shap_value = updating_shap
|
3894
3895
|
|
upgini/metrics.py
CHANGED
@@ -99,8 +99,7 @@ LIGHTGBM_REGRESSION_PARAMS = {
|
|
99
99
|
"min_sum_hessian_in_leaf": 0.01,
|
100
100
|
"objective": "huber",
|
101
101
|
"deterministic": "true",
|
102
|
-
"force_col_wise": "true",
|
103
|
-
"force_row_wise": "true",
|
102
|
+
# "force_col_wise": "true",
|
104
103
|
"verbosity": -1,
|
105
104
|
}
|
106
105
|
|
@@ -120,8 +119,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
120
119
|
"num_grad_quant_bins": "8",
|
121
120
|
"stochastic_rounding": "true",
|
122
121
|
"deterministic": "true",
|
123
|
-
"force_col_wise": "true",
|
124
|
-
"force_row_wise": "true",
|
122
|
+
# "force_col_wise": "true",
|
125
123
|
"verbosity": -1,
|
126
124
|
}
|
127
125
|
|
@@ -138,8 +136,7 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
138
136
|
"cat_smooth": 18,
|
139
137
|
"cat_l2": 8,
|
140
138
|
"deterministic": "true",
|
141
|
-
"force_col_wise": "true",
|
142
|
-
"force_row_wise": "true",
|
139
|
+
# "force_col_wise": "true",
|
143
140
|
"verbosity": -1,
|
144
141
|
}
|
145
142
|
|
@@ -148,33 +145,33 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
|
|
148
145
|
N_FOLDS = 5
|
149
146
|
BLOCKED_TS_TEST_SIZE = 0.2
|
150
147
|
|
151
|
-
NA_VALUES = [
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
]
|
176
|
-
|
177
|
-
NA_REPLACEMENT = "NA"
|
148
|
+
# NA_VALUES = [
|
149
|
+
# "",
|
150
|
+
# " ",
|
151
|
+
# " ",
|
152
|
+
# "#n/a",
|
153
|
+
# "#n/a n/a",
|
154
|
+
# "#na",
|
155
|
+
# "-1.#ind",
|
156
|
+
# "-1.#qnan",
|
157
|
+
# "-nan",
|
158
|
+
# "1.#ind",
|
159
|
+
# "1.#qnan",
|
160
|
+
# "n/a",
|
161
|
+
# "na",
|
162
|
+
# "null",
|
163
|
+
# "nan",
|
164
|
+
# "n/a",
|
165
|
+
# "nan",
|
166
|
+
# "none",
|
167
|
+
# "-",
|
168
|
+
# "undefined",
|
169
|
+
# "[[unknown]]",
|
170
|
+
# "[not provided]",
|
171
|
+
# "[unknown]",
|
172
|
+
# ]
|
173
|
+
|
174
|
+
# NA_REPLACEMENT = "NA"
|
178
175
|
|
179
176
|
SUPPORTED_CATBOOST_METRICS = {
|
180
177
|
s.upper(): s
|
@@ -761,8 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
761
758
|
params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
|
762
759
|
self.cat_features = _get_cat_features(x)
|
763
760
|
if self.cat_features:
|
764
|
-
x = fill_na_cat_features(x, self.cat_features)
|
765
|
-
encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value
|
761
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
762
|
+
encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
|
766
763
|
encoded = pd.DataFrame(
|
767
764
|
encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
768
765
|
)
|
@@ -776,7 +773,7 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
776
773
|
def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
|
777
774
|
x, y_numpy, params = super()._prepare_to_calculate(x, y)
|
778
775
|
if self.cat_features is not None:
|
779
|
-
x = fill_na_cat_features(x, self.cat_features)
|
776
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
780
777
|
if self.cat_encoder is not None:
|
781
778
|
x[self.cat_features] = pd.DataFrame(
|
782
779
|
self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
@@ -860,7 +857,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
|
|
860
857
|
self.cat_features = _get_cat_features(x)
|
861
858
|
num_features = [col for col in x.columns if col not in self.cat_features]
|
862
859
|
x[num_features] = x[num_features].fillna(-999)
|
863
|
-
x = fill_na_cat_features(x, self.cat_features)
|
860
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
864
861
|
# TODO use one-hot encoding if cardinality is less 50
|
865
862
|
for feature in self.cat_features:
|
866
863
|
x[feature] = x[feature].astype("category").cat.codes
|
@@ -873,7 +870,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
|
|
873
870
|
if self.cat_features is not None:
|
874
871
|
num_features = [col for col in x.columns if col not in self.cat_features]
|
875
872
|
x[num_features] = x[num_features].fillna(-999)
|
876
|
-
x = fill_na_cat_features(x, self.cat_features)
|
873
|
+
# x = fill_na_cat_features(x, self.cat_features)
|
877
874
|
# TODO use one-hot encoding if cardinality is less 50
|
878
875
|
for feature in self.cat_features:
|
879
876
|
x[feature] = x[feature].astype("category").cat.codes
|
@@ -1059,10 +1056,10 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
|
|
1059
1056
|
return mse if squared else np.sqrt(mse)
|
1060
1057
|
|
1061
1058
|
|
1062
|
-
def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1059
|
+
# def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1060
|
+
# for c in cat_features:
|
1061
|
+
# if c in df.columns:
|
1062
|
+
# df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
|
1063
|
+
# na_filter = df[c].str.lower().isin(NA_VALUES)
|
1064
|
+
# df.loc[na_filter, c] = NA_REPLACEMENT
|
1065
|
+
# return df
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
|
7
7
|
upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
|
10
10
|
upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
|
74
|
+
upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.80.dist-info/RECORD,,
|
File without changes
|
File without changes
|