upgini 1.2.79a1__py3-none-any.whl → 1.2.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.79a1"
1
+ __version__ = "1.2.80"
@@ -1119,7 +1119,7 @@ class FeaturesEnricher(TransformerMixin):
1119
1119
  self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
1120
1120
  }
1121
1121
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1122
- validated_y
1122
+ y_sorted
1123
1123
  ):
1124
1124
  train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1125
1125
  # np.mean(validated_y), 4
@@ -1197,7 +1197,7 @@ class FeaturesEnricher(TransformerMixin):
1197
1197
  # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
1198
1198
  }
1199
1199
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1200
- validated_eval_set[idx][1]
1200
+ eval_y_sorted
1201
1201
  ):
1202
1202
  eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1203
1203
  # np.mean(validated_eval_set[idx][1]), 4
@@ -3886,9 +3886,10 @@ if response.status_code == 200:
3886
3886
  if updated_shaps is not None:
3887
3887
  updating_shap = updated_shaps.get(feature_meta.name)
3888
3888
  if updating_shap is None:
3889
- self.logger.warning(
3890
- f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
3891
- )
3889
+ if feature_meta.shap_value != 0.0:
3890
+ self.logger.warning(
3891
+ f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
3892
+ )
3892
3893
  updating_shap = 0.0
3893
3894
  feature_meta.shap_value = updating_shap
3894
3895
 
upgini/metrics.py CHANGED
@@ -99,8 +99,7 @@ LIGHTGBM_REGRESSION_PARAMS = {
99
99
  "min_sum_hessian_in_leaf": 0.01,
100
100
  "objective": "huber",
101
101
  "deterministic": "true",
102
- "force_col_wise": "true",
103
- "force_row_wise": "true",
102
+ # "force_col_wise": "true",
104
103
  "verbosity": -1,
105
104
  }
106
105
 
@@ -120,8 +119,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
120
119
  "num_grad_quant_bins": "8",
121
120
  "stochastic_rounding": "true",
122
121
  "deterministic": "true",
123
- "force_col_wise": "true",
124
- "force_row_wise": "true",
122
+ # "force_col_wise": "true",
125
123
  "verbosity": -1,
126
124
  }
127
125
 
@@ -138,8 +136,7 @@ LIGHTGBM_BINARY_PARAMS = {
138
136
  "cat_smooth": 18,
139
137
  "cat_l2": 8,
140
138
  "deterministic": "true",
141
- "force_col_wise": "true",
142
- "force_row_wise": "true",
139
+ # "force_col_wise": "true",
143
140
  "verbosity": -1,
144
141
  }
145
142
 
@@ -148,33 +145,33 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
148
145
  N_FOLDS = 5
149
146
  BLOCKED_TS_TEST_SIZE = 0.2
150
147
 
151
- NA_VALUES = [
152
- "",
153
- " ",
154
- " ",
155
- "#n/a",
156
- "#n/a n/a",
157
- "#na",
158
- "-1.#ind",
159
- "-1.#qnan",
160
- "-nan",
161
- "1.#ind",
162
- "1.#qnan",
163
- "n/a",
164
- "na",
165
- "null",
166
- "nan",
167
- "n/a",
168
- "nan",
169
- "none",
170
- "-",
171
- "undefined",
172
- "[[unknown]]",
173
- "[not provided]",
174
- "[unknown]",
175
- ]
176
-
177
- NA_REPLACEMENT = "NA"
148
+ # NA_VALUES = [
149
+ # "",
150
+ # " ",
151
+ # " ",
152
+ # "#n/a",
153
+ # "#n/a n/a",
154
+ # "#na",
155
+ # "-1.#ind",
156
+ # "-1.#qnan",
157
+ # "-nan",
158
+ # "1.#ind",
159
+ # "1.#qnan",
160
+ # "n/a",
161
+ # "na",
162
+ # "null",
163
+ # "nan",
164
+ # "n/a",
165
+ # "nan",
166
+ # "none",
167
+ # "-",
168
+ # "undefined",
169
+ # "[[unknown]]",
170
+ # "[not provided]",
171
+ # "[unknown]",
172
+ # ]
173
+
174
+ # NA_REPLACEMENT = "NA"
178
175
 
179
176
  SUPPORTED_CATBOOST_METRICS = {
180
177
  s.upper(): s
@@ -761,8 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
761
758
  params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
762
759
  self.cat_features = _get_cat_features(x)
763
760
  if self.cat_features:
764
- x = fill_na_cat_features(x, self.cat_features)
765
- encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
761
+ # x = fill_na_cat_features(x, self.cat_features)
762
+ encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
766
763
  encoded = pd.DataFrame(
767
764
  encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
768
765
  )
@@ -776,7 +773,7 @@ class LightGBMWrapper(EstimatorWrapper):
776
773
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
777
774
  x, y_numpy, params = super()._prepare_to_calculate(x, y)
778
775
  if self.cat_features is not None:
779
- x = fill_na_cat_features(x, self.cat_features)
776
+ # x = fill_na_cat_features(x, self.cat_features)
780
777
  if self.cat_encoder is not None:
781
778
  x[self.cat_features] = pd.DataFrame(
782
779
  self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
@@ -860,7 +857,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
860
857
  self.cat_features = _get_cat_features(x)
861
858
  num_features = [col for col in x.columns if col not in self.cat_features]
862
859
  x[num_features] = x[num_features].fillna(-999)
863
- x = fill_na_cat_features(x, self.cat_features)
860
+ # x = fill_na_cat_features(x, self.cat_features)
864
861
  # TODO use one-hot encoding if cardinality is less 50
865
862
  for feature in self.cat_features:
866
863
  x[feature] = x[feature].astype("category").cat.codes
@@ -873,7 +870,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
873
870
  if self.cat_features is not None:
874
871
  num_features = [col for col in x.columns if col not in self.cat_features]
875
872
  x[num_features] = x[num_features].fillna(-999)
876
- x = fill_na_cat_features(x, self.cat_features)
873
+ # x = fill_na_cat_features(x, self.cat_features)
877
874
  # TODO use one-hot encoding if cardinality is less 50
878
875
  for feature in self.cat_features:
879
876
  x[feature] = x[feature].astype("category").cat.codes
@@ -1059,10 +1056,10 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
1059
1056
  return mse if squared else np.sqrt(mse)
1060
1057
 
1061
1058
 
1062
- def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
1063
- for c in cat_features:
1064
- if c in df.columns:
1065
- df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
1066
- na_filter = df[c].str.lower().isin(NA_VALUES)
1067
- df.loc[na_filter, c] = NA_REPLACEMENT
1068
- return df
1059
+ # def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
1060
+ # for c in cat_features:
1061
+ # if c in df.columns:
1062
+ # df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
1063
+ # na_filter = df[c].str.lower().isin(NA_VALUES)
1064
+ # df.loc[na_filter, c] = NA_REPLACEMENT
1065
+ # return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.79a1
3
+ Version: 1.2.80
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=h3pHSW7QFH3c863fq8fxK5FCQiwhFda3blWAzoxplSE,25
1
+ upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=_UkJS35uGaYtI7dR6Xd9Q28nmiPzTjhK3y8v3IjJTfQ,208245
6
+ upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
7
7
  upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=l7r4cM-xrftcgOTv4uMQBHC_Sd820Z6umw5bIpP5wDI,39384
9
+ upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
10
10
  upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.79a1.dist-info/METADATA,sha256=49MF6sCtAqdDrgL7s0hY2fm7T0ma0A5yeJQ6oIokZDo,49093
74
- upgini-1.2.79a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.79a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.79a1.dist-info/RECORD,,
73
+ upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
74
+ upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.80.dist-info/RECORD,,