upgini 1.2.42__py3-none-any.whl → 1.2.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.42"
1
+ __version__ = "1.2.44"
@@ -277,7 +277,7 @@ class FeaturesEnricher(TransformerMixin):
277
277
  dict()
278
278
  )
279
279
 
280
- validate_version(self.logger, self.__log_warning)
280
+ validate_version(self.logger)
281
281
 
282
282
  self.search_keys = search_keys or {}
283
283
  self.id_columns = id_columns
@@ -584,6 +584,7 @@ class FeaturesEnricher(TransformerMixin):
584
584
  Transformed dataframe, enriched with valuable features.
585
585
  """
586
586
 
587
+ self.warning_counter.reset()
587
588
  trace_id = str(uuid.uuid4())
588
589
  start_time = time.time()
589
590
  with MDC(trace_id=trace_id):
@@ -720,7 +721,7 @@ class FeaturesEnricher(TransformerMixin):
720
721
  X_new: pandas.DataFrame of shape (n_samples, n_features_new)
721
722
  Transformed dataframe, enriched with valuable features.
722
723
  """
723
-
724
+ self.warning_counter.reset()
724
725
  search_progress = SearchProgress(0.0, ProgressStage.START_TRANSFORM)
725
726
  if progress_callback is not None:
726
727
  progress_callback(search_progress)
@@ -1086,13 +1087,16 @@ class FeaturesEnricher(TransformerMixin):
1086
1087
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
1087
1088
  "quality_metrics_train_segment"
1088
1089
  ),
1089
- self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
1090
+ # self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
1091
+ # Show actually used for metrics dataset size
1092
+ self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
1090
1093
  }
1091
1094
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1092
1095
  validated_y
1093
1096
  ):
1094
1097
  train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1095
- np.mean(validated_y), 4
1098
+ # np.mean(validated_y), 4
1099
+ np.mean(y_sorted), 4
1096
1100
  )
1097
1101
  if etalon_metric is not None:
1098
1102
  train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
@@ -1153,13 +1157,14 @@ class FeaturesEnricher(TransformerMixin):
1153
1157
  else:
1154
1158
  eval_uplift = None
1155
1159
 
1156
- # effective_eval_set = eval_set if eval_set is not None else self.eval_set
1157
1160
  eval_metrics = {
1158
1161
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
1159
1162
  "quality_metrics_eval_segment"
1160
1163
  ).format(idx + 1),
1161
1164
  self.bundle.get("quality_metrics_rows_header"): _num_samples(
1162
- effective_eval_set[idx][0]
1165
+ # effective_eval_set[idx][0]
1166
+ # Use actually used for metrics dataset
1167
+ eval_X_sorted
1163
1168
  ),
1164
1169
  # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
1165
1170
  }
@@ -1167,7 +1172,9 @@ class FeaturesEnricher(TransformerMixin):
1167
1172
  validated_eval_set[idx][1]
1168
1173
  ):
1169
1174
  eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1170
- np.mean(validated_eval_set[idx][1]), 4
1175
+ # np.mean(validated_eval_set[idx][1]), 4
1176
+ # Use actually used for metrics dataset
1177
+ np.mean(eval_y_sorted), 4
1171
1178
  )
1172
1179
  if etalon_eval_metric is not None:
1173
1180
  eval_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
@@ -2527,11 +2534,9 @@ if response.status_code == 200:
2527
2534
  def __is_registered(self) -> bool:
2528
2535
  return self.api_key is not None and self.api_key != ""
2529
2536
 
2530
- def __log_warning(self, message: str, show_support_link: bool = False, is_red=False):
2537
+ def __log_warning(self, message: str, show_support_link: bool = False):
2531
2538
  warning_num = self.warning_counter.increment()
2532
2539
  formatted_message = f"WARNING #{warning_num}: {message}\n"
2533
- if is_red:
2534
- formatted_message = Format.RED + formatted_message + Format.END
2535
2540
  if show_support_link:
2536
2541
  self.__display_support_link(formatted_message)
2537
2542
  else:
@@ -2584,7 +2589,12 @@ if response.status_code == 200:
2584
2589
  checked_generate_features = []
2585
2590
  for gen_feature in self.generate_features:
2586
2591
  if gen_feature not in x_columns:
2587
- self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2592
+ if gen_feature == self._get_phone_column(self.search_keys):
2593
+ raise ValidationError(
2594
+ self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2595
+ )
2596
+ else:
2597
+ self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2588
2598
  else:
2589
2599
  checked_generate_features.append(gen_feature)
2590
2600
  self.generate_features = checked_generate_features
@@ -167,6 +167,12 @@ class DateTimeSearchKeyConverter:
167
167
  # Drop intermediate columns if not needed
168
168
  df.drop(columns=["second", "minute", "hour"], inplace=True)
169
169
 
170
+ for generated_feature in self.generated_features[:]:
171
+ if df[generated_feature].dropna().nunique() <= 1:
172
+ self.logger.warning(f"Generated constant feature {generated_feature} will be dropped")
173
+ df.drop(columns=generated_feature, inplace=True)
174
+ self.generated_features.remove(generated_feature)
175
+
170
176
  df.drop(columns=seconds, inplace=True)
171
177
 
172
178
  if keep_time:
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import threading
3
- from typing import Callable, Optional
4
3
 
5
4
  import requests
6
5
 
6
+ from upgini.utils.format import Format
7
+
7
8
  try:
8
9
  from packaging.version import parse
9
10
  except ImportError:
@@ -31,18 +32,16 @@ def get_version(package, url_pattern=URL_PATTERN):
31
32
  return version
32
33
 
33
34
 
34
- def validate_version(logger: logging.Logger, warning_function: Optional[Callable[[str], None]] = None):
35
+ def validate_version(logger: logging.Logger):
35
36
  def task():
36
37
  try:
37
38
  current_version = parse(__version__)
38
39
  latest_version = get_version("upgini")
39
40
  if current_version < latest_version:
40
41
  msg = bundle.get("version_warning").format(current_version, latest_version)
41
- if warning_function:
42
- warning_function(msg, is_red=True)
43
- else:
44
- logger.warning(msg)
45
- print(msg)
42
+ formatted_message = Format.RED + msg + Format.END
43
+ logger.warning(msg)
44
+ print(formatted_message)
46
45
  except Exception:
47
46
  logger.warning("Failed to validate version", exc_info=True)
48
47
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.42
3
+ Version: 1.2.44
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,16 +1,16 @@
1
- upgini/__about__.py,sha256=Ng7mLXI95GEYWOWi3HpHuVabVYSs3igDIcqorkuDZds,23
1
+ upgini/__about__.py,sha256=tZ6_k-1uqq2cr3SCnYO9N-chw-LhbafQ_arSm_DOlLs,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=d9VlOs9hTf6eL8TX_9bO400HQj3y_jVGthABvQJqONs,33350
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=AiAaMc6f1EFufMcxh3In1LSe3Qia8y0La-p3pzJt3Es,198494
6
+ upgini/features_enricher.py,sha256=QQaK682uFjrkvDOt-ub7UFMAEy6SVjPQubb6dY_7moE,199109
7
7
  upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
10
10
  upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
- upgini/version_validator.py,sha256=KnmBeEqHMxBDCDT_muCx-cevxesg5YwD15NHHy7d0RE,1607
13
+ upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
14
14
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
15
15
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
16
16
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -43,7 +43,7 @@ upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl
43
43
  upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk,6937
44
44
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
45
45
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
46
- upgini/utils/datetime_utils.py,sha256=F61i2vZCB6eUy4WwodDyPi50XKPbhOHsxDrU6tGa6CM,13133
46
+ upgini/utils/datetime_utils.py,sha256=RVAk4_rakK8X9zjybK3-rj0to0e3elye8tnBuA4wTWU,13491
47
47
  upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
48
48
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
49
49
  upgini/utils/email_utils.py,sha256=GbnhHJn1nhUBytmK6PophYqaoq4t7Lp6i0-O0Gd3RV8,5265
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.42.dist-info/METADATA,sha256=HNZlJ9FLp00lrSazHWbRSYV_O2yLYXDvsjSaE-Z7nYU,49055
63
- upgini-1.2.42.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
- upgini-1.2.42.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.42.dist-info/RECORD,,
62
+ upgini-1.2.44.dist-info/METADATA,sha256=2IrvXuScwwbpIwePrfDjVpZN4jiz3STJhs4vET2Hl40,49055
63
+ upgini-1.2.44.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
+ upgini-1.2.44.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.44.dist-info/RECORD,,