upgini 1.2.43__tar.gz → 1.2.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.43 → upgini-1.2.45}/PKG-INFO +1 -1
  2. upgini-1.2.45/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/dataset.py +4 -4
  4. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/features_enricher.py +21 -11
  5. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/version_validator.py +6 -7
  6. upgini-1.2.43/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.43 → upgini-1.2.45}/.gitignore +0 -0
  8. {upgini-1.2.43 → upgini-1.2.45}/LICENSE +0 -0
  9. {upgini-1.2.43 → upgini-1.2.45}/README.md +0 -0
  10. {upgini-1.2.43 → upgini-1.2.45}/pyproject.toml +0 -0
  11. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/all_operands.py +0 -0
  17. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/data_source/data_source_publisher.py +0 -0
  26. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/errors.py +0 -0
  27. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/http.py +0 -0
  28. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/lazy_import.py +0 -0
  29. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/mdc/__init__.py +0 -0
  30. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/mdc/context.py +0 -0
  31. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/metadata.py +0 -0
  32. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/metrics.py +0 -0
  33. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/normalizer/normalize_utils.py +0 -0
  35. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/datetime_utils.py +0 -0
  53. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/email_utils.py +0 -0
  56. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/fallback_progress_bar.py +0 -0
  57. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/feature_info.py +0 -0
  58. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/features_validator.py +0 -0
  59. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/format.py +0 -0
  60. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/ip_utils.py +0 -0
  61. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/phone_utils.py +0 -0
  62. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/postal_code_utils.py +0 -0
  63. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/progress_bar.py +0 -0
  64. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/sklearn_ext.py +0 -0
  65. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/target_utils.py +0 -0
  66. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/track_info.py +0 -0
  67. {upgini-1.2.43 → upgini-1.2.45}/src/upgini/utils/warning_counter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.43
3
+ Version: 1.2.45
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.45"
@@ -33,7 +33,6 @@ from upgini.metadata import (
33
33
  NumericInterval,
34
34
  RuntimeParameters,
35
35
  SearchCustomization,
36
- SearchKey,
37
36
  )
38
37
  from upgini.resource_bundle import ResourceBundle, get_custom_bundle
39
38
  from upgini.search_task import SearchTask
@@ -646,8 +645,9 @@ class Dataset: # (pd.DataFrame):
646
645
  with tempfile.TemporaryDirectory() as tmp_dir:
647
646
  parquet_file_path = self.prepare_uploading_file(tmp_dir)
648
647
  time.sleep(1) # this is neccesary to avoid requests rate limit restrictions
649
- time_left = time.time() - start_time
650
- search_progress = SearchProgress(2.0, ProgressStage.CREATING_FIT, time_left)
648
+ # If previous steps were too fast, time estimation could be calculated incorrectly
649
+ time_left = max(time.time() - start_time, 20)
650
+ search_progress = SearchProgress(1.0, ProgressStage.CREATING_FIT, time_left)
651
651
  if progress_bar is not None:
652
652
  progress_bar.progress = search_progress.to_progress_bar()
653
653
  if progress_callback is not None:
@@ -699,7 +699,7 @@ class Dataset: # (pd.DataFrame):
699
699
  runtime_parameters=runtime_parameters,
700
700
  metrics_calculation=metrics_calculation,
701
701
  )
702
- seconds_left = time.time() - start_time
702
+ seconds_left = max(time.time() - start_time, 20)
703
703
  search_progress = SearchProgress(1.0, ProgressStage.CREATING_TRANSFORM, seconds_left)
704
704
  if progress_bar is not None:
705
705
  progress_bar.progress = search_progress.to_progress_bar()
@@ -277,7 +277,7 @@ class FeaturesEnricher(TransformerMixin):
277
277
  dict()
278
278
  )
279
279
 
280
- validate_version(self.logger, self.__log_warning)
280
+ validate_version(self.logger)
281
281
 
282
282
  self.search_keys = search_keys or {}
283
283
  self.id_columns = id_columns
@@ -584,6 +584,7 @@ class FeaturesEnricher(TransformerMixin):
584
584
  Transformed dataframe, enriched with valuable features.
585
585
  """
586
586
 
587
+ self.warning_counter.reset()
587
588
  trace_id = str(uuid.uuid4())
588
589
  start_time = time.time()
589
590
  with MDC(trace_id=trace_id):
@@ -720,7 +721,7 @@ class FeaturesEnricher(TransformerMixin):
720
721
  X_new: pandas.DataFrame of shape (n_samples, n_features_new)
721
722
  Transformed dataframe, enriched with valuable features.
722
723
  """
723
-
724
+ self.warning_counter.reset()
724
725
  search_progress = SearchProgress(0.0, ProgressStage.START_TRANSFORM)
725
726
  if progress_callback is not None:
726
727
  progress_callback(search_progress)
@@ -1086,13 +1087,16 @@ class FeaturesEnricher(TransformerMixin):
1086
1087
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
1087
1088
  "quality_metrics_train_segment"
1088
1089
  ),
1089
- self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
1090
+ # self.bundle.get("quality_metrics_rows_header"): _num_samples(effective_X),
1091
+ # Show actually used for metrics dataset size
1092
+ self.bundle.get("quality_metrics_rows_header"): _num_samples(fitting_X),
1090
1093
  }
1091
1094
  if model_task_type in [ModelTaskType.BINARY, ModelTaskType.REGRESSION] and is_numeric_dtype(
1092
1095
  validated_y
1093
1096
  ):
1094
1097
  train_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1095
- np.mean(validated_y), 4
1098
+ # np.mean(validated_y), 4
1099
+ np.mean(y_sorted), 4
1096
1100
  )
1097
1101
  if etalon_metric is not None:
1098
1102
  train_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = etalon_metric
@@ -1153,13 +1157,14 @@ class FeaturesEnricher(TransformerMixin):
1153
1157
  else:
1154
1158
  eval_uplift = None
1155
1159
 
1156
- # effective_eval_set = eval_set if eval_set is not None else self.eval_set
1157
1160
  eval_metrics = {
1158
1161
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
1159
1162
  "quality_metrics_eval_segment"
1160
1163
  ).format(idx + 1),
1161
1164
  self.bundle.get("quality_metrics_rows_header"): _num_samples(
1162
- effective_eval_set[idx][0]
1165
+ # effective_eval_set[idx][0]
1166
+ # Use actually used for metrics dataset
1167
+ eval_X_sorted
1163
1168
  ),
1164
1169
  # self.bundle.get("quality_metrics_match_rate_header"): eval_hit_rate,
1165
1170
  }
@@ -1167,7 +1172,9 @@ class FeaturesEnricher(TransformerMixin):
1167
1172
  validated_eval_set[idx][1]
1168
1173
  ):
1169
1174
  eval_metrics[self.bundle.get("quality_metrics_mean_target_header")] = round(
1170
- np.mean(validated_eval_set[idx][1]), 4
1175
+ # np.mean(validated_eval_set[idx][1]), 4
1176
+ # Use actually used for metrics dataset
1177
+ np.mean(eval_y_sorted), 4
1171
1178
  )
1172
1179
  if etalon_eval_metric is not None:
1173
1180
  eval_metrics[self.bundle.get("quality_metrics_baseline_header").format(metric)] = (
@@ -2527,11 +2534,9 @@ if response.status_code == 200:
2527
2534
  def __is_registered(self) -> bool:
2528
2535
  return self.api_key is not None and self.api_key != ""
2529
2536
 
2530
- def __log_warning(self, message: str, show_support_link: bool = False, is_red=False):
2537
+ def __log_warning(self, message: str, show_support_link: bool = False):
2531
2538
  warning_num = self.warning_counter.increment()
2532
2539
  formatted_message = f"WARNING #{warning_num}: {message}\n"
2533
- if is_red:
2534
- formatted_message = Format.RED + formatted_message + Format.END
2535
2540
  if show_support_link:
2536
2541
  self.__display_support_link(formatted_message)
2537
2542
  else:
@@ -2584,7 +2589,12 @@ if response.status_code == 200:
2584
2589
  checked_generate_features = []
2585
2590
  for gen_feature in self.generate_features:
2586
2591
  if gen_feature not in x_columns:
2587
- self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2592
+ if gen_feature == self._get_phone_column(self.search_keys):
2593
+ raise ValidationError(
2594
+ self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2595
+ )
2596
+ else:
2597
+ self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2588
2598
  else:
2589
2599
  checked_generate_features.append(gen_feature)
2590
2600
  self.generate_features = checked_generate_features
@@ -1,9 +1,10 @@
1
1
  import json
2
2
  import threading
3
- from typing import Callable, Optional
4
3
 
5
4
  import requests
6
5
 
6
+ from upgini.utils.format import Format
7
+
7
8
  try:
8
9
  from packaging.version import parse
9
10
  except ImportError:
@@ -31,18 +32,16 @@ def get_version(package, url_pattern=URL_PATTERN):
31
32
  return version
32
33
 
33
34
 
34
- def validate_version(logger: logging.Logger, warning_function: Optional[Callable[[str], None]] = None):
35
+ def validate_version(logger: logging.Logger):
35
36
  def task():
36
37
  try:
37
38
  current_version = parse(__version__)
38
39
  latest_version = get_version("upgini")
39
40
  if current_version < latest_version:
40
41
  msg = bundle.get("version_warning").format(current_version, latest_version)
41
- if warning_function:
42
- warning_function(msg, is_red=True)
43
- else:
44
- logger.warning(msg)
45
- print(msg)
42
+ formatted_message = Format.RED + msg + Format.END
43
+ logger.warning(msg)
44
+ print(formatted_message)
46
45
  except Exception:
47
46
  logger.warning("Failed to validate version", exc_info=True)
48
47
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.43"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes