upgini 1.1.274a3__tar.gz → 1.1.274a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (85) hide show
  1. {upgini-1.1.274a3/src/upgini.egg-info → upgini-1.1.274a4}/PKG-INFO +1 -1
  2. {upgini-1.1.274a3 → upgini-1.1.274a4}/setup.py +1 -1
  3. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/features_enricher.py +19 -9
  4. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/resource_bundle/strings.properties +1 -0
  5. {upgini-1.1.274a3 → upgini-1.1.274a4/src/upgini.egg-info}/PKG-INFO +1 -1
  6. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_features_enricher.py +13 -7
  7. {upgini-1.1.274a3 → upgini-1.1.274a4}/LICENSE +0 -0
  8. {upgini-1.1.274a3 → upgini-1.1.274a4}/README.md +0 -0
  9. {upgini-1.1.274a3 → upgini-1.1.274a4}/pyproject.toml +0 -0
  10. {upgini-1.1.274a3 → upgini-1.1.274a4}/setup.cfg +0 -0
  11. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/__init__.py +0 -0
  12. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/ads.py +0 -0
  13. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/all_operands.py +0 -0
  17. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/data_source/data_source_publisher.py +0 -0
  26. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/dataset.py +0 -0
  27. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/errors.py +0 -0
  28. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/fingerprint.js +0 -0
  29. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/http.py +0 -0
  30. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/metadata.py +0 -0
  33. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/metrics.py +0 -0
  34. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/normalizer/phone_normalizer.py +0 -0
  36. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/search_task.py +0 -0
  44. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/spinner.py +0 -0
  45. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/deduplicate_utils.py +0 -0
  53. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/display_utils.py +0 -0
  54. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/email_utils.py +0 -0
  55. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/fallback_progress_bar.py +0 -0
  56. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/features_validator.py +0 -0
  57. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/format.py +0 -0
  58. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/ip_utils.py +0 -0
  59. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/phone_utils.py +0 -0
  60. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/postal_code_utils.py +0 -0
  61. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/progress_bar.py +0 -0
  62. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/sklearn_ext.py +0 -0
  63. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/target_utils.py +0 -0
  64. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/track_info.py +0 -0
  65. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/utils/warning_counter.py +0 -0
  66. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini/version_validator.py +0 -0
  67. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini.egg-info/SOURCES.txt +0 -0
  68. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini.egg-info/dependency_links.txt +0 -0
  69. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini.egg-info/requires.txt +0 -0
  70. {upgini-1.1.274a3 → upgini-1.1.274a4}/src/upgini.egg-info/top_level.txt +0 -0
  71. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_autofe_operands.py +0 -0
  72. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_binary_dataset.py +0 -0
  73. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_blocked_time_series.py +0 -0
  74. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_categorical_dataset.py +0 -0
  75. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_continuous_dataset.py +0 -0
  76. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_country_utils.py +0 -0
  77. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_custom_loss_utils.py +0 -0
  78. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_datetime_utils.py +0 -0
  79. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_email_utils.py +0 -0
  80. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_etalon_validation.py +0 -0
  81. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_metrics.py +0 -0
  82. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_phone_utils.py +0 -0
  83. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_postal_code_utils.py +0 -0
  84. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_target_utils.py +0 -0
  85. {upgini-1.1.274a3 → upgini-1.1.274a4}/tests/test_widget.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.274a3
3
+ Version: 1.1.274a4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -40,7 +40,7 @@ def send_log(msg: str):
40
40
 
41
41
 
42
42
  here = Path(__file__).parent.resolve()
43
- version = "1.1.274a3"
43
+ version = "1.1.274a4"
44
44
  try:
45
45
  send_log(f"Start setup PyLib version {version}")
46
46
  setup(
@@ -1809,12 +1809,10 @@ class FeaturesEnricher(TransformerMixin):
1809
1809
  features_section = ""
1810
1810
 
1811
1811
  search_id = self._search_task.search_task_id
1812
- api_example = (
1813
- f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
1812
+ api_example = f"""curl 'https://search.upgini.com/online/api/http_inference_trigger?search_id={search_id}' \\
1814
1813
  -H 'Authorization: {self.api_key}' \\
1815
1814
  -H 'Content-Type: application/json' \\
1816
1815
  -d '{{"search_keys": {keys}{features_section}}}'"""
1817
- )
1818
1816
  return api_example
1819
1817
 
1820
1818
  def _get_copy_of_runtime_parameters(self) -> RuntimeParameters:
@@ -1910,8 +1908,7 @@ class FeaturesEnricher(TransformerMixin):
1910
1908
  else:
1911
1909
  self.logger.info("Input dataset hasn't date column")
1912
1910
  if self.add_date_if_missing:
1913
- df = self._add_current_date_as_key(df)
1914
- search_keys[self.CURRENT_DATE] = SearchKey.DATE
1911
+ df = self._add_current_date_as_key(df, search_keys, self.logger, self.bundle)
1915
1912
  email_column = self._get_email_column(search_keys)
1916
1913
  hem_column = self._get_hem_column(search_keys)
1917
1914
  email_converted_to_hem = False
@@ -2282,8 +2279,7 @@ class FeaturesEnricher(TransformerMixin):
2282
2279
  else:
2283
2280
  self.logger.info("Input dataset hasn't date column")
2284
2281
  if self.add_date_if_missing:
2285
- df = self._add_current_date_as_key(df)
2286
- self.fit_search_keys[self.CURRENT_DATE] = SearchKey.DATE
2282
+ df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2287
2283
  email_column = self._get_email_column(self.fit_search_keys)
2288
2284
  hem_column = self._get_hem_column(self.fit_search_keys)
2289
2285
  email_converted_to_hem = False
@@ -2865,8 +2861,22 @@ class FeaturesEnricher(TransformerMixin):
2865
2861
  return col
2866
2862
 
2867
2863
  @staticmethod
2868
- def _add_current_date_as_key(df: pd.DataFrame) -> pd.DataFrame:
2869
- df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
2864
+ def _add_current_date_as_key(
2865
+ df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
2866
+ ) -> pd.DataFrame:
2867
+ if (
2868
+ set(search_keys.values()) == {SearchKey.PHONE}
2869
+ or set(search_keys.values()) == {SearchKey.EMAIL}
2870
+ or set(search_keys.values()) == {SearchKey.HEM}
2871
+ or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
2872
+ ):
2873
+ msg = bundle.get("current_date_added")
2874
+ print(msg)
2875
+ logger.warning(msg)
2876
+ df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
2877
+ search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
2878
+ converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE, None, logger, bundle)
2879
+ df = converter.convert(df)
2870
2880
  return df
2871
2881
 
2872
2882
  @staticmethod
@@ -38,6 +38,7 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
38
38
  loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
39
39
  multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
40
40
  group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
41
+ current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
41
42
 
42
43
  # Errors
43
44
  failed_search_by_task_id=Failed to retrieve the specified search results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.274a3
3
+ Version: 1.1.274a4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -487,10 +487,7 @@ def test_saved_features_enricher(requests_mock: Mocker):
487
487
  train_random_indices = random.choice(train_target.index, size=9000, replace=False)
488
488
  train_target.loc[train_random_indices] = 0
489
489
 
490
- metrics = enricher.calculate_metrics(
491
- train_features,
492
- train_target
493
- )
490
+ metrics = enricher.calculate_metrics(train_features, train_target)
494
491
  expected_metrics = pd.DataFrame(
495
492
  {
496
493
  segment_header: [train_segment],
@@ -2230,8 +2227,9 @@ def test_email_search_key(requests_mock: Mocker):
2230
2227
  "hashed_email_64ff8c",
2231
2228
  "email_one_domain_3b0a68",
2232
2229
  "email_domain_10c73f",
2230
+ "current_date_b993c4",
2233
2231
  }
2234
- assert {"hashed_email_64ff8c", "email_one_domain_3b0a68"} == {
2232
+ assert {"hashed_email_64ff8c", "email_one_domain_3b0a68", "current_date_b993c4"} == {
2235
2233
  sk for sublist in self.search_keys for sk in sublist
2236
2234
  }
2237
2235
  raise TestException()
@@ -2276,10 +2274,18 @@ def test_composit_index_search_key(requests_mock: Mocker):
2276
2274
  **kwargs,
2277
2275
  ):
2278
2276
  self.validate()
2279
- assert set(self.columns.to_list()) == {"system_record_id", "country_aff64e", "postal_code_13534a", "target"}
2277
+ assert set(self.columns.to_list()) == {
2278
+ "system_record_id",
2279
+ "country_aff64e",
2280
+ "postal_code_13534a",
2281
+ "current_date_b993c4",
2282
+ "target",
2283
+ }
2280
2284
  assert "country_aff64e" in self.columns
2281
2285
  assert "postal_code_13534a"
2282
- assert {"country_aff64e", "postal_code_13534a"} == {sk for sublist in self.search_keys for sk in sublist}
2286
+ assert {"country_aff64e", "postal_code_13534a", "current_date_b993c4"} == {
2287
+ sk for sublist in self.search_keys for sk in sublist
2288
+ }
2283
2289
  # assert "country_fake_a" in self.columns
2284
2290
  # assert "postal_code_fake_a" in self.columns
2285
2291
  # assert {"country_fake_a", "postal_code_fake_a"} == {sk for sublist in self.search_keys for sk in sublist}
File without changes
File without changes
File without changes
File without changes
File without changes