upgini 1.2.79__py3-none-any.whl → 1.2.80__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.79"
1
+ __version__ = "1.2.80"
@@ -1119,7 +1119,7 @@ class FeaturesEnricher(TransformerMixin):
1119
1119
  self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
1120
1120
  }
1121
1121
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1122
- validated_y
1122
+ y_sorted
1123
1123
  ):
1124
1124
  train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1125
1125
  # np.mean(validated_y), 4
@@ -1197,7 +1197,7 @@ class FeaturesEnricher(TransformerMixin):
1197
1197
  # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
1198
1198
  }
1199
1199
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1200
- validated_eval_set[idx][1]
1200
+ eval_y_sorted
1201
1201
  ):
1202
1202
  eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1203
1203
  # np.mean(validated_eval_set[idx][1]), 4
@@ -3886,9 +3886,10 @@ if response.status_code == 200:
3886
3886
  if updated_shaps is not None:
3887
3887
  updating_shap = updated_shaps.get(feature_meta.name)
3888
3888
  if updating_shap is None:
3889
- self.logger.warning(
3890
- f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
3891
- )
3889
+ if feature_meta.shap_value != 0.0:
3890
+ self.logger.warning(
3891
+ f"WARNING: Shap value for feature {feature_meta.name} not found and will be set to 0.0"
3892
+ )
3892
3893
  updating_shap = 0.0
3893
3894
  feature_meta.shap_value = updating_shap
3894
3895
 
upgini/metrics.py CHANGED
@@ -99,7 +99,7 @@ LIGHTGBM_REGRESSION_PARAMS = {
99
99
  "min_sum_hessian_in_leaf": 0.01,
100
100
  "objective": "huber",
101
101
  "deterministic": "true",
102
- "force_col_wise": "true",
102
+ # "force_col_wise": "true",
103
103
  "verbosity": -1,
104
104
  }
105
105
 
@@ -119,7 +119,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
119
119
  "num_grad_quant_bins": "8",
120
120
  "stochastic_rounding": "true",
121
121
  "deterministic": "true",
122
- "force_col_wise": "true",
122
+ # "force_col_wise": "true",
123
123
  "verbosity": -1,
124
124
  }
125
125
 
@@ -136,7 +136,7 @@ LIGHTGBM_BINARY_PARAMS = {
136
136
  "cat_smooth": 18,
137
137
  "cat_l2": 8,
138
138
  "deterministic": "true",
139
- "force_col_wise": "true",
139
+ # "force_col_wise": "true",
140
140
  "verbosity": -1,
141
141
  }
142
142
 
@@ -145,33 +145,33 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
145
145
  N_FOLDS = 5
146
146
  BLOCKED_TS_TEST_SIZE = 0.2
147
147
 
148
- NA_VALUES = [
149
- "",
150
- " ",
151
- " ",
152
- "#n/a",
153
- "#n/a n/a",
154
- "#na",
155
- "-1.#ind",
156
- "-1.#qnan",
157
- "-nan",
158
- "1.#ind",
159
- "1.#qnan",
160
- "n/a",
161
- "na",
162
- "null",
163
- "nan",
164
- "n/a",
165
- "nan",
166
- "none",
167
- "-",
168
- "undefined",
169
- "[[unknown]]",
170
- "[not provided]",
171
- "[unknown]",
172
- ]
173
-
174
- NA_REPLACEMENT = "NA"
148
+ # NA_VALUES = [
149
+ # "",
150
+ # " ",
151
+ # " ",
152
+ # "#n/a",
153
+ # "#n/a n/a",
154
+ # "#na",
155
+ # "-1.#ind",
156
+ # "-1.#qnan",
157
+ # "-nan",
158
+ # "1.#ind",
159
+ # "1.#qnan",
160
+ # "n/a",
161
+ # "na",
162
+ # "null",
163
+ # "nan",
164
+ # "n/a",
165
+ # "nan",
166
+ # "none",
167
+ # "-",
168
+ # "undefined",
169
+ # "[[unknown]]",
170
+ # "[not provided]",
171
+ # "[unknown]",
172
+ # ]
173
+
174
+ # NA_REPLACEMENT = "NA"
175
175
 
176
176
  SUPPORTED_CATBOOST_METRICS = {
177
177
  s.upper(): s
@@ -758,8 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
758
758
  params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
759
759
  self.cat_features = _get_cat_features(x)
760
760
  if self.cat_features:
761
- x = fill_na_cat_features(x, self.cat_features)
762
- encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
761
+ # x = fill_na_cat_features(x, self.cat_features)
762
+ encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan)
763
763
  encoded = pd.DataFrame(
764
764
  encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
765
765
  )
@@ -773,7 +773,7 @@ class LightGBMWrapper(EstimatorWrapper):
773
773
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
774
774
  x, y_numpy, params = super()._prepare_to_calculate(x, y)
775
775
  if self.cat_features is not None:
776
- x = fill_na_cat_features(x, self.cat_features)
776
+ # x = fill_na_cat_features(x, self.cat_features)
777
777
  if self.cat_encoder is not None:
778
778
  x[self.cat_features] = pd.DataFrame(
779
779
  self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
@@ -857,7 +857,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
857
857
  self.cat_features = _get_cat_features(x)
858
858
  num_features = [col for col in x.columns if col not in self.cat_features]
859
859
  x[num_features] = x[num_features].fillna(-999)
860
- x = fill_na_cat_features(x, self.cat_features)
860
+ # x = fill_na_cat_features(x, self.cat_features)
861
861
  # TODO use one-hot encoding if cardinality is less 50
862
862
  for feature in self.cat_features:
863
863
  x[feature] = x[feature].astype("category").cat.codes
@@ -870,7 +870,7 @@ class OtherEstimatorWrapper(EstimatorWrapper):
870
870
  if self.cat_features is not None:
871
871
  num_features = [col for col in x.columns if col not in self.cat_features]
872
872
  x[num_features] = x[num_features].fillna(-999)
873
- x = fill_na_cat_features(x, self.cat_features)
873
+ # x = fill_na_cat_features(x, self.cat_features)
874
874
  # TODO use one-hot encoding if cardinality is less 50
875
875
  for feature in self.cat_features:
876
876
  x[feature] = x[feature].astype("category").cat.codes
@@ -1056,10 +1056,10 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
1056
1056
  return mse if squared else np.sqrt(mse)
1057
1057
 
1058
1058
 
1059
- def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
1060
- for c in cat_features:
1061
- if c in df.columns:
1062
- df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
1063
- na_filter = df[c].str.lower().isin(NA_VALUES)
1064
- df.loc[na_filter, c] = NA_REPLACEMENT
1065
- return df
1059
+ # def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
1060
+ # for c in cat_features:
1061
+ # if c in df.columns:
1062
+ # df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
1063
+ # na_filter = df[c].str.lower().isin(NA_VALUES)
1064
+ # df.loc[na_filter, c] = NA_REPLACEMENT
1065
+ # return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.79
3
+ Version: 1.2.80
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=mupwAhPLfGDd9OAn7f6J2lwQapeaIysxn41khUOG57I,23
1
+ upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=_UkJS35uGaYtI7dR6Xd9Q28nmiPzTjhK3y8v3IjJTfQ,208245
6
+ upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
7
7
  upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=_Ue1nymBMVmoCdPMGGXe0FFkvNxNg592FmX2WJWKDFE,39294
9
+ upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
10
10
  upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.79.dist-info/METADATA,sha256=zxQyk76bWj5AGiXERdfEQVcqsZXD4RU5gaRQGk1LpeM,49091
74
- upgini-1.2.79.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.79.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.79.dist-info/RECORD,,
73
+ upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
74
+ upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.80.dist-info/RECORD,,